snapsearch-client-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +20 -0
- data/README.md +109 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/examples/rack/Gemfile +5 -0
- data/examples/rack/config.ru +88 -0
- data/examples/rack/public/index.html +15 -0
- data/examples/sinatra/Gemfile +5 -0
- data/examples/sinatra/Gemfile.lock +90 -0
- data/examples/sinatra/config.ru +15 -0
- data/examples/sinatra/lib/sinatra_snap_search.rb +19 -0
- data/examples/sinatra/public/index.html +15 -0
- data/lib/rack/snap_search.rb +143 -0
- data/lib/rack/snap_search/config.rb +85 -0
- data/lib/snap_search.rb +14 -0
- data/lib/snap_search/client.rb +147 -0
- data/lib/snap_search/connection_exception.rb +15 -0
- data/lib/snap_search/detector.rb +248 -0
- data/lib/snap_search/exception.rb +8 -0
- data/lib/snap_search/interceptor.rb +66 -0
- data/lib/snap_search/validation_exception.rb +17 -0
- data/resources/cacert.pem +3785 -0
- data/resources/extensions.json +26 -0
- data/resources/robots.json +208 -0
- data/snapsearch.gemspec +31 -0
- data/spec/lib/rack/qs_spec.rb +34 -0
- data/spec/lib/rack/snap_search/config_spec.rb +56 -0
- data/spec/lib/snap_search/detector_spec.rb +362 -0
- data/spec/lib/snap_search/interceptor_spec.rb +116 -0
- data/spec/spec_helper.rb +6 -0
- metadata +216 -0
@@ -0,0 +1,26 @@
|
|
1
|
+
{
|
2
|
+
"generic": [
|
3
|
+
"html",
|
4
|
+
"htm",
|
5
|
+
"shtml",
|
6
|
+
"xhtml",
|
7
|
+
"xht",
|
8
|
+
"cgi"
|
9
|
+
],
|
10
|
+
"php": [
|
11
|
+
"php"
|
12
|
+
],
|
13
|
+
"python" : [
|
14
|
+
"py"
|
15
|
+
],
|
16
|
+
"asp": [
|
17
|
+
"asp",
|
18
|
+
"aspx"
|
19
|
+
],
|
20
|
+
"java": [
|
21
|
+
"jsp"
|
22
|
+
],
|
23
|
+
"perl": [
|
24
|
+
"pl"
|
25
|
+
]
|
26
|
+
}
|
@@ -0,0 +1,208 @@
|
|
1
|
+
{
|
2
|
+
"ignore": [
|
3
|
+
"SnapSearch"
|
4
|
+
],
|
5
|
+
"match": [
|
6
|
+
"008",
|
7
|
+
"ABACHOBot",
|
8
|
+
"Accoona-AI-Agent",
|
9
|
+
"AddSugarSpiderBot",
|
10
|
+
"Adsbot-Google",
|
11
|
+
"altavista",
|
12
|
+
"AnyApexBot",
|
13
|
+
"Arachmo",
|
14
|
+
"archiver",
|
15
|
+
"B-l-i-t-z-B-O-T",
|
16
|
+
"Baiduspider",
|
17
|
+
"BecomeBot",
|
18
|
+
"BeslistBot",
|
19
|
+
"BillyBobBot",
|
20
|
+
"Bimbot",
|
21
|
+
"Bingbot",
|
22
|
+
"BingPreview",
|
23
|
+
"bitlybot",
|
24
|
+
"BlitzBOT",
|
25
|
+
"boitho.com-dc",
|
26
|
+
"boitho.com-robot",
|
27
|
+
"borg",
|
28
|
+
"BrowserMob",
|
29
|
+
"btbot",
|
30
|
+
"CatchBot",
|
31
|
+
"Catchpoint bot",
|
32
|
+
"Cerberian Drtrs",
|
33
|
+
"Charlotte",
|
34
|
+
"ConveraCrawler",
|
35
|
+
"cosmos",
|
36
|
+
"Covario IDS",
|
37
|
+
"crawler",
|
38
|
+
"DataparkSearch",
|
39
|
+
"DiamondBot",
|
40
|
+
"Discobot",
|
41
|
+
"Dotbot",
|
42
|
+
"Embedly",
|
43
|
+
"EmeraldShield.com WebBot",
|
44
|
+
"envolk[ITS]spider",
|
45
|
+
"EsperanzaBot",
|
46
|
+
"Exabot",
|
47
|
+
"facebookexternalhit",
|
48
|
+
"FAST-WebCrawler",
|
49
|
+
"FAST Enterprise Crawler",
|
50
|
+
"Feedfetcher",
|
51
|
+
"FDSE robot",
|
52
|
+
"FindLinks",
|
53
|
+
"FurlBot",
|
54
|
+
"FyberSpider",
|
55
|
+
"g2crawler",
|
56
|
+
"Gaisbot",
|
57
|
+
"GalaxyBot",
|
58
|
+
"genieBot",
|
59
|
+
"Gigabot",
|
60
|
+
"Girafabot",
|
61
|
+
"GomezA",
|
62
|
+
"Googlebot",
|
63
|
+
"GurujiBot",
|
64
|
+
"HappyFunBot",
|
65
|
+
"hl_ftien_spider",
|
66
|
+
"Holmes",
|
67
|
+
"htdig",
|
68
|
+
"http client",
|
69
|
+
"HttpMonitor",
|
70
|
+
"iaskspider",
|
71
|
+
"ia_archiver",
|
72
|
+
"iCCrawler",
|
73
|
+
"ichiro",
|
74
|
+
"igdeSpyder",
|
75
|
+
"IRLbot",
|
76
|
+
"IssueCrawler",
|
77
|
+
"Jaxified Bot",
|
78
|
+
"Jyxobot",
|
79
|
+
"KoepaBot",
|
80
|
+
"L.webis",
|
81
|
+
"LapozzBot",
|
82
|
+
"Larbin",
|
83
|
+
"LDSpider",
|
84
|
+
"LexxeBot",
|
85
|
+
"Linguee Bot",
|
86
|
+
"LinkedInBot",
|
87
|
+
"LinkWalker",
|
88
|
+
"lmspider",
|
89
|
+
"lwp-trivial",
|
90
|
+
"lycos",
|
91
|
+
"mabontland",
|
92
|
+
"magpie-crawler",
|
93
|
+
"Mediapartners",
|
94
|
+
"MJ12bot",
|
95
|
+
"Mnogosearch",
|
96
|
+
"mogimogi",
|
97
|
+
"MojeekBot",
|
98
|
+
"Moreoverbot",
|
99
|
+
"Morning Paper",
|
100
|
+
"msnbot",
|
101
|
+
"MSRBot",
|
102
|
+
"MVAClient",
|
103
|
+
"mxbot",
|
104
|
+
"netresearch",
|
105
|
+
"NetResearchServer",
|
106
|
+
"NetSeer Crawler",
|
107
|
+
"NewsGator",
|
108
|
+
"NG-Search",
|
109
|
+
"nicebot",
|
110
|
+
"noxtrumbot",
|
111
|
+
"Nusearch Spider",
|
112
|
+
"NutchCVS",
|
113
|
+
"Nymesis",
|
114
|
+
"obot",
|
115
|
+
"oegp",
|
116
|
+
"omgilibot",
|
117
|
+
"OmniExplorer_Bot",
|
118
|
+
"OOZBOT",
|
119
|
+
"openbot",
|
120
|
+
"Orbiter",
|
121
|
+
"PageBitesHyperBot",
|
122
|
+
"PagePeeker",
|
123
|
+
"Peew",
|
124
|
+
"Pinterest",
|
125
|
+
"polybot",
|
126
|
+
"Pompos",
|
127
|
+
"PostPost",
|
128
|
+
"Psbot",
|
129
|
+
"PycURL",
|
130
|
+
"Qseero",
|
131
|
+
"Radian6",
|
132
|
+
"RAMPyBot",
|
133
|
+
"Read Later",
|
134
|
+
"rogerbot",
|
135
|
+
"RufusBot",
|
136
|
+
"SandCrawler",
|
137
|
+
"SBIder",
|
138
|
+
"scooter",
|
139
|
+
"ScoutJet",
|
140
|
+
"Scrubby",
|
141
|
+
"SearchSight",
|
142
|
+
"Seekbot",
|
143
|
+
"semanticdiscovery",
|
144
|
+
"Sensis Web Crawler",
|
145
|
+
"SEOChat::Bot",
|
146
|
+
"SeznamBot",
|
147
|
+
"Shim-Crawler",
|
148
|
+
"ShopWiki",
|
149
|
+
"Shoula robot",
|
150
|
+
"silk",
|
151
|
+
"Sitebot",
|
152
|
+
"slurp",
|
153
|
+
"Snappy",
|
154
|
+
"sogou spider",
|
155
|
+
"Sosospider",
|
156
|
+
"speedy",
|
157
|
+
"Speedy Spider",
|
158
|
+
"spider",
|
159
|
+
"Sqworm",
|
160
|
+
"StackRambler",
|
161
|
+
"suggybot",
|
162
|
+
"SurveyBot",
|
163
|
+
"SynooBot",
|
164
|
+
"Teoma",
|
165
|
+
"TerrawizBot",
|
166
|
+
"TheSuBot",
|
167
|
+
"Thumbnail.CZ robot",
|
168
|
+
"ThumbShotsBot",
|
169
|
+
"TinEye",
|
170
|
+
"truwoGPS",
|
171
|
+
"TurnitinBot",
|
172
|
+
"TweetedTimes Bot",
|
173
|
+
"TwengaBot",
|
174
|
+
"Twitterbot",
|
175
|
+
"updated",
|
176
|
+
"URL2PNG",
|
177
|
+
"Urlfilebot",
|
178
|
+
"Vagabondo",
|
179
|
+
"voila",
|
180
|
+
"VoilaBot",
|
181
|
+
"Vortex",
|
182
|
+
"voyager",
|
183
|
+
"VYU2",
|
184
|
+
"webcollage",
|
185
|
+
"Websquash.com",
|
186
|
+
"WebThumb",
|
187
|
+
"wf84",
|
188
|
+
"Willow Internet Crawler",
|
189
|
+
"WoFindeIch Robot",
|
190
|
+
"WomlpeFactory",
|
191
|
+
"Xaldon_WebSpider",
|
192
|
+
"yacy",
|
193
|
+
"yahoo",
|
194
|
+
"YandexBot",
|
195
|
+
"YandexImages",
|
196
|
+
"Yasaklibot",
|
197
|
+
"Yeti",
|
198
|
+
"YodaoBot",
|
199
|
+
"yoogliFetchAgent",
|
200
|
+
"YottaaMonitor",
|
201
|
+
"YoudaoBot",
|
202
|
+
"Zao",
|
203
|
+
"Zealbot",
|
204
|
+
"ZooShot",
|
205
|
+
"zspider",
|
206
|
+
"ZyBorg"
|
207
|
+
]
|
208
|
+
}
|
data/snapsearch.gemspec
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
|
5
|
+
# Variables
|
6
|
+
s.author = 'Ryan Scott Lewis'
|
7
|
+
s.email = 'roger.qiu@polycademy.com'
|
8
|
+
s.name = 'snapsearch-client-ruby'
|
9
|
+
s.summary = 'Ruby HTTP Client Middleware Libraries for SnapSearch. Search engine optimisation for single page applications.'
|
10
|
+
s.homepage = 'https://github.com/SnapSearch/SnapSearch-Client-Ruby'
|
11
|
+
s.license = 'MIT'
|
12
|
+
|
13
|
+
# Dependencies
|
14
|
+
s.add_dependency 'version', '~> 1.0.0'
|
15
|
+
s.add_dependency 'httpi', '~> 2.1.0'
|
16
|
+
s.add_dependency 'addressable', '~> 2.0.0'
|
17
|
+
s.add_dependency 'rack', '~> 1.5.0'
|
18
|
+
s.add_development_dependency 'rake', '~> 10.1.1'
|
19
|
+
s.add_development_dependency 'rspec', '~> 2.14.1'
|
20
|
+
s.add_development_dependency 'guard-rspec', '~> 4.2.5'
|
21
|
+
s.add_development_dependency 'guard-yard', '~> 2.1.0'
|
22
|
+
s.add_development_dependency 'fuubar', '~> 1.3.2'
|
23
|
+
|
24
|
+
# Pragmatically set variables
|
25
|
+
s.version = Pathname.glob('VERSION*').first.read rescue '0.0.0'
|
26
|
+
s.description = s.summary
|
27
|
+
s.require_paths = ['lib']
|
28
|
+
s.files = Dir['{{Rake,Gem}file{.lock,},README*,VERSION,LICENSE,*.gemspec,{lib,bin,examples,resources,spec,test}/**/*}']
|
29
|
+
s.test_files = Dir['{examples,spec,test}/**/*']
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
describe 'Rack application QS splitting' do
|
5
|
+
|
6
|
+
let(:env) do
|
7
|
+
{
|
8
|
+
'HTTP_HOST' => 'localhost',
|
9
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
10
|
+
'SERVER_NAME' => 'localhost',
|
11
|
+
'SERVER_PORT' => '80',
|
12
|
+
'REMOTE_ADDR' => '::1',
|
13
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
14
|
+
'REQUEST_SCHEME' => 'http',
|
15
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
16
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
17
|
+
'REQUEST_METHOD' => 'GET',
|
18
|
+
'QUERY_STRING' => 'key%201=value+1&_escaped_fragment_=%2Fpath2%3Fkey2=value2',
|
19
|
+
'PATH_INFO' => '/snapsearch/path1',
|
20
|
+
'rack.url_scheme' => 'http',
|
21
|
+
'rack.input' => StringIO.new
|
22
|
+
}
|
23
|
+
end
|
24
|
+
|
25
|
+
subject { Rack::Request.new(env) }
|
26
|
+
|
27
|
+
it 'should decode the key/values when splitting the params of a request' do
|
28
|
+
subject.params.should == {
|
29
|
+
'key 1' => 'value 1',
|
30
|
+
"_escaped_fragment_" => '/path2?key2=value2'
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Rack::SnapSearch::Config do
|
4
|
+
|
5
|
+
subject { described_class.new }
|
6
|
+
|
7
|
+
let(:proc) do
|
8
|
+
Proc.new {}
|
9
|
+
end
|
10
|
+
|
11
|
+
context 'For each attribute' do
|
12
|
+
|
13
|
+
described_class::ATTRIBUTES.each do |attribute|
|
14
|
+
it { should respond_to(attribute) } # Have a getter method
|
15
|
+
it { should respond_to("#{attribute}=") } # Have a setter method
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
describe '#on_exception' do
|
21
|
+
|
22
|
+
it { should respond_to(:on_exception) }
|
23
|
+
|
24
|
+
it 'should be a setter if a block is given and a getter if not' do
|
25
|
+
subject.on_exception.should == nil
|
26
|
+
subject.on_exception(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
27
|
+
subject.on_exception.should == proc
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
describe '#before_intercept' do
|
33
|
+
|
34
|
+
it { should respond_to(:before_intercept) }
|
35
|
+
|
36
|
+
it 'should be a setter if a block is given and a getter if not' do
|
37
|
+
subject.before_intercept.should == nil
|
38
|
+
subject.before_intercept(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
39
|
+
subject.before_intercept.should == proc
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
describe '#after_intercept' do
|
45
|
+
|
46
|
+
it { should respond_to(:after_intercept) }
|
47
|
+
|
48
|
+
it 'should be a setter if a block is given and a getter if not' do
|
49
|
+
subject.after_intercept.should == nil
|
50
|
+
subject.after_intercept(&proc) # Note that this is the same as giving a block with do/end or curly brackets
|
51
|
+
subject.after_intercept.should == proc
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
@@ -0,0 +1,362 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'rack/request'
|
3
|
+
|
4
|
+
describe SnapSearch::Detector do
|
5
|
+
|
6
|
+
let(:normal_browser) do
|
7
|
+
{
|
8
|
+
'HTTP_HOST' => 'localhost',
|
9
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
10
|
+
'SERVER_NAME' => 'localhost',
|
11
|
+
'SERVER_PORT' => '80',
|
12
|
+
'REMOTE_ADDR' => '::1',
|
13
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
14
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
15
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
16
|
+
'REQUEST_METHOD' => 'GET',
|
17
|
+
'QUERY_STRING' => '',
|
18
|
+
'PATH_INFO' => '/snapsearch/',
|
19
|
+
'rack.url_scheme' => 'http',
|
20
|
+
'rack.input' => StringIO.new
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
let(:search_engine) do
|
25
|
+
{
|
26
|
+
'HTTP_HOST' => 'localhost',
|
27
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
28
|
+
'SERVER_NAME' => 'localhost',
|
29
|
+
'SERVER_PORT' => '80',
|
30
|
+
'REMOTE_ADDR' => '::1',
|
31
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
32
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
33
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
34
|
+
'REQUEST_METHOD' => 'GET',
|
35
|
+
'QUERY_STRING' => '',
|
36
|
+
'PATH_INFO' => '/snapsearch/',
|
37
|
+
'rack.url_scheme' => 'http',
|
38
|
+
'rack.input' => StringIO.new
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
let(:snapsearch_robot) do
|
43
|
+
{
|
44
|
+
'HTTP_HOST' => 'localhost',
|
45
|
+
'HTTP_USER_AGENT' => 'SnapSearch',
|
46
|
+
'SERVER_NAME' => 'localhost',
|
47
|
+
'SERVER_PORT' => '80',
|
48
|
+
'REMOTE_ADDR' => '::1',
|
49
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
50
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
51
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
52
|
+
'REQUEST_METHOD' => 'GET',
|
53
|
+
'QUERY_STRING' => '',
|
54
|
+
'PATH_INFO' => '/snapsearch/',
|
55
|
+
'rack.url_scheme' => 'http',
|
56
|
+
'rack.input' => StringIO.new
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
let(:non_get_route) do
|
61
|
+
{
|
62
|
+
'HTTP_HOST' => 'localhost',
|
63
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
64
|
+
'SERVER_NAME' => 'localhost',
|
65
|
+
'SERVER_PORT' => '80',
|
66
|
+
'REMOTE_ADDR' => '::1',
|
67
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
68
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
69
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
70
|
+
'REQUEST_METHOD' => 'POST',
|
71
|
+
'QUERY_STRING' => '',
|
72
|
+
'PATH_INFO' => '/snapsearch/',
|
73
|
+
'rack.url_scheme' => 'http',
|
74
|
+
'rack.input' => StringIO.new
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
let(:ignored_route) do
|
79
|
+
{
|
80
|
+
'HTTP_HOST' => 'localhost',
|
81
|
+
'HTTP_USER_AGENT' => 'Googlebot-Video/1.0',
|
82
|
+
'SERVER_NAME' => 'localhost',
|
83
|
+
'SERVER_PORT' => '80',
|
84
|
+
'REMOTE_ADDR' => '::1',
|
85
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
86
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
87
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
88
|
+
'REQUEST_METHOD' => 'GET',
|
89
|
+
'QUERY_STRING' => '',
|
90
|
+
'PATH_INFO' => '/ignored/',
|
91
|
+
'rack.url_scheme' => 'http',
|
92
|
+
'rack.input' => StringIO.new
|
93
|
+
}
|
94
|
+
end
|
95
|
+
|
96
|
+
let(:matched_route) do
|
97
|
+
{
|
98
|
+
'HTTP_HOST' => 'localhost',
|
99
|
+
'HTTP_USER_AGENT' => 'msnbot/1.1 ( http://search.msn.com/msnbot.htm)',
|
100
|
+
'SERVER_NAME' => 'localhost',
|
101
|
+
'SERVER_PORT' => '80',
|
102
|
+
'REMOTE_ADDR' => '::1',
|
103
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
104
|
+
'REQUEST_SCHEME' => 'http',
|
105
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
106
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
107
|
+
'REQUEST_METHOD' => 'GET',
|
108
|
+
'QUERY_STRING' => '',
|
109
|
+
'PATH_INFO' => '/matched/',
|
110
|
+
'rack.url_scheme' => 'http',
|
111
|
+
'rack.input' => StringIO.new
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
let(:basic_escaped_fragment_route) do
|
116
|
+
{
|
117
|
+
'HTTP_HOST' => 'localhost',
|
118
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
119
|
+
'SERVER_NAME' => 'localhost',
|
120
|
+
'SERVER_PORT' => '80',
|
121
|
+
'REMOTE_ADDR' => '::1',
|
122
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
123
|
+
'REQUEST_SCHEME' => 'http',
|
124
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
125
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
126
|
+
'REQUEST_METHOD' => 'GET',
|
127
|
+
'QUERY_STRING' => '_escaped_fragment_',
|
128
|
+
'PATH_INFO' => '/snapsearch',
|
129
|
+
'rack.url_scheme' => 'http',
|
130
|
+
'rack.input' => StringIO.new
|
131
|
+
}
|
132
|
+
end
|
133
|
+
|
134
|
+
let(:escaped_fragment_route) do
|
135
|
+
{
|
136
|
+
'HTTP_HOST' => 'localhost',
|
137
|
+
'HTTP_USER_AGENT' => 'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
|
138
|
+
'SERVER_NAME' => 'localhost',
|
139
|
+
'SERVER_PORT' => '80',
|
140
|
+
'REMOTE_ADDR' => '::1',
|
141
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
142
|
+
'REQUEST_SCHEME' => 'http',
|
143
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
144
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
145
|
+
'REQUEST_METHOD' => 'GET',
|
146
|
+
'QUERY_STRING' => 'key1=value1&_escaped_fragment_=%2Fpath2%3Fkey2=value2',
|
147
|
+
'PATH_INFO' => '/snapsearch/path1',
|
148
|
+
'rack.url_scheme' => 'http',
|
149
|
+
'rack.input' => StringIO.new
|
150
|
+
}
|
151
|
+
end
|
152
|
+
|
153
|
+
let(:valid_file_extension_route) do
|
154
|
+
{
|
155
|
+
'HTTP_HOST' => 'localhost',
|
156
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
157
|
+
'SERVER_NAME' => 'localhost',
|
158
|
+
'SERVER_PORT' => '80',
|
159
|
+
'REMOTE_ADDR' => '::1',
|
160
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
161
|
+
'REQUEST_SCHEME' => 'http',
|
162
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
163
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
164
|
+
'REQUEST_METHOD' => 'GET',
|
165
|
+
'QUERY_STRING' => '',
|
166
|
+
'PATH_INFO' => '/snapsearch/song.html?key=value',
|
167
|
+
'rack.url_scheme' => 'http',
|
168
|
+
'rack.input' => StringIO.new
|
169
|
+
}
|
170
|
+
end
|
171
|
+
|
172
|
+
let(:invalid_file_extension_route) do
|
173
|
+
{
|
174
|
+
'HTTP_HOST' => 'localhost',
|
175
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
176
|
+
'SERVER_NAME' => 'localhost',
|
177
|
+
'SERVER_PORT' => '80',
|
178
|
+
'REMOTE_ADDR' => '::1',
|
179
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
180
|
+
'REQUEST_SCHEME' => 'http',
|
181
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
182
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
183
|
+
'REQUEST_METHOD' => 'GET',
|
184
|
+
'QUERY_STRING' => '',
|
185
|
+
'PATH_INFO' => '/snapsearch/song.html.mp3?key=value',
|
186
|
+
'rack.url_scheme' => 'http',
|
187
|
+
'rack.input' => StringIO.new
|
188
|
+
}
|
189
|
+
end
|
190
|
+
|
191
|
+
let(:nonexistent_file_extension_route) do
|
192
|
+
{
|
193
|
+
'HTTP_HOST' => 'localhost',
|
194
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
195
|
+
'SERVER_NAME' => 'localhost',
|
196
|
+
'SERVER_PORT' => '80',
|
197
|
+
'REMOTE_ADDR' => '::1',
|
198
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
199
|
+
'REQUEST_SCHEME' => 'http',
|
200
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
201
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
202
|
+
'REQUEST_METHOD' => 'GET',
|
203
|
+
'QUERY_STRING' => '',
|
204
|
+
'PATH_INFO' => '/snapsearch/',
|
205
|
+
'rack.url_scheme' => 'http',
|
206
|
+
'rack.input' => StringIO.new
|
207
|
+
}
|
208
|
+
end
|
209
|
+
|
210
|
+
let(:mp3_file_extension_route) do
|
211
|
+
{
|
212
|
+
'HTTP_HOST' => 'localhost',
|
213
|
+
'HTTP_USER_AGENT' => 'AdsBot-Google ( http://www.google.com/adsbot.html)',
|
214
|
+
'SERVER_NAME' => 'localhost',
|
215
|
+
'SERVER_PORT' => '80',
|
216
|
+
'REMOTE_ADDR' => '::1',
|
217
|
+
'DOCUMENT_ROOT' => 'C:/www',
|
218
|
+
'REQUEST_SCHEME' => 'http',
|
219
|
+
'GATEWAY_INTERFACE' => 'CGI/1.1',
|
220
|
+
'SERVER_PROTOCOL' => 'HTTP/1.1',
|
221
|
+
'REQUEST_METHOD' => 'GET',
|
222
|
+
'QUERY_STRING' => '',
|
223
|
+
'PATH_INFO' => '/snapsearch/song.html.mp3?key=value',
|
224
|
+
'rack.url_scheme' => 'http',
|
225
|
+
'rack.input' => StringIO.new
|
226
|
+
}
|
227
|
+
end
|
228
|
+
|
229
|
+
subject { described_class.new }
|
230
|
+
|
231
|
+
describe '#detect' do
|
232
|
+
|
233
|
+
describe 'When a request from a normal browser comes through' do
|
234
|
+
|
235
|
+
let(:request) { Rack::Request.new(normal_browser) }
|
236
|
+
|
237
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
describe 'When a request from a search engine robot comes through' do
|
242
|
+
|
243
|
+
let(:request) { Rack::Request.new(search_engine) }
|
244
|
+
|
245
|
+
it('should be intercepted') { subject.detect(request: request).should == true }
|
246
|
+
|
247
|
+
end
|
248
|
+
|
249
|
+
describe 'When a request from a SnapSearch robot comes through' do
|
250
|
+
|
251
|
+
let(:request) { Rack::Request.new(snapsearch_robot) }
|
252
|
+
|
253
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
describe 'When a non-GET request comes through' do
|
258
|
+
|
259
|
+
let(:request) { Rack::Request.new(non_get_route) }
|
260
|
+
|
261
|
+
it('should not be intercepted') { subject.detect(request: request).should == false }
|
262
|
+
|
263
|
+
end
|
264
|
+
|
265
|
+
describe 'When an ignored route request comes through' do
|
266
|
+
|
267
|
+
let(:request) { Rack::Request.new(ignored_route) }
|
268
|
+
|
269
|
+
it('should not be intercepted') { subject.detect(ignored_routes: [/^\/ignored/], request: request).should == false }
|
270
|
+
|
271
|
+
end
|
272
|
+
|
273
|
+
describe 'When a non-matched route request comes through' do
|
274
|
+
|
275
|
+
let(:request) { Rack::Request.new(matched_route) }
|
276
|
+
|
277
|
+
it('should not be intercepted') { subject.detect(matched_routes: [/^\/non_matched_route/], request: request).should == false }
|
278
|
+
|
279
|
+
end
|
280
|
+
|
281
|
+
describe 'When a matched route request comes through' do
|
282
|
+
|
283
|
+
let(:request) { Rack::Request.new(matched_route) }
|
284
|
+
|
285
|
+
it('should be intercepted') { subject.detect(matched_routes: [/^\/matched/], request: request).should == true }
|
286
|
+
|
287
|
+
end
|
288
|
+
|
289
|
+
describe 'When an escaped fragmented request comes through' do
|
290
|
+
|
291
|
+
let(:request) { Rack::Request.new(basic_escaped_fragment_route) }
|
292
|
+
|
293
|
+
it('should be intercepted') { subject.detect(request: request).should == true }
|
294
|
+
|
295
|
+
end
|
296
|
+
|
297
|
+
describe 'When other factors allow it and a valid file extension comes through' do
|
298
|
+
|
299
|
+
let(:request) { Rack::Request.new(valid_file_extension_route) }
|
300
|
+
|
301
|
+
it('should be intercepted') { subject.detect(request: request, check_file_extensions: true).should == true }
|
302
|
+
|
303
|
+
end
|
304
|
+
|
305
|
+
describe 'When an invalid file extension comes through' do
|
306
|
+
|
307
|
+
let(:request) { Rack::Request.new(invalid_file_extension_route) }
|
308
|
+
|
309
|
+
it('should not be intercepted') { subject.detect(request: request, check_file_extensions: true).should == false }
|
310
|
+
|
311
|
+
end
|
312
|
+
|
313
|
+
describe 'When other factors allow it and a nonexistent file extension comes through' do
|
314
|
+
|
315
|
+
let(:request) { Rack::Request.new(nonexistent_file_extension_route) }
|
316
|
+
|
317
|
+
it('should be intercepted') { subject.detect(request: request, check_file_extensions: true).should == true }
|
318
|
+
|
319
|
+
end
|
320
|
+
|
321
|
+
describe 'When custom extensions are set' do
|
322
|
+
|
323
|
+
let(:request) { Rack::Request.new(mp3_file_extension_route) }
|
324
|
+
|
325
|
+
it('should be intercepted correctly') do
|
326
|
+
subject.detect(request: request, check_file_extensions: true).should == false
|
327
|
+
|
328
|
+
subject.extensions['generic'] << 'mp3'
|
329
|
+
|
330
|
+
subject.detect(request: request, check_file_extensions: true).should == true
|
331
|
+
|
332
|
+
subject.extensions['generic'].delete('mp3')
|
333
|
+
|
334
|
+
subject.detect(request: request, check_file_extensions: true).should == false
|
335
|
+
end
|
336
|
+
|
337
|
+
end
|
338
|
+
|
339
|
+
end
|
340
|
+
|
341
|
+
describe '#get_encoded_url' do
|
342
|
+
|
343
|
+
let(:request) { Rack::Request.new(escaped_fragment_route) }
|
344
|
+
let(:uri) { Addressable::URI.parse(request.url) }
|
345
|
+
|
346
|
+
it 'should convert the escaped fragment route back to hash fragment' do
|
347
|
+
subject.get_encoded_url(request.params, uri).should == 'http://localhost/snapsearch/path1?key1=value1#!/path2?key2=value2'
|
348
|
+
end
|
349
|
+
|
350
|
+
end
|
351
|
+
|
352
|
+
describe '#get_decoded_path' do
|
353
|
+
|
354
|
+
let(:uri) { Addressable::URI.parse('http://localhost:8080/some%20path/an%2Fother+path/path1.htm?key1=value%201%3F') }
|
355
|
+
|
356
|
+
it 'should return the decoded path with the correct query and haash fragment' do
|
357
|
+
subject.get_decoded_path( {}, uri ).should == '/some path/an/other+path/path1.htm?key1=value 1?'
|
358
|
+
end
|
359
|
+
|
360
|
+
end
|
361
|
+
|
362
|
+
end
|