snapsearch-client-ruby 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +20 -0
- data/README.md +109 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/examples/rack/Gemfile +5 -0
- data/examples/rack/config.ru +88 -0
- data/examples/rack/public/index.html +15 -0
- data/examples/sinatra/Gemfile +5 -0
- data/examples/sinatra/Gemfile.lock +90 -0
- data/examples/sinatra/config.ru +15 -0
- data/examples/sinatra/lib/sinatra_snap_search.rb +19 -0
- data/examples/sinatra/public/index.html +15 -0
- data/lib/rack/snap_search.rb +143 -0
- data/lib/rack/snap_search/config.rb +85 -0
- data/lib/snap_search.rb +14 -0
- data/lib/snap_search/client.rb +147 -0
- data/lib/snap_search/connection_exception.rb +15 -0
- data/lib/snap_search/detector.rb +248 -0
- data/lib/snap_search/exception.rb +8 -0
- data/lib/snap_search/interceptor.rb +66 -0
- data/lib/snap_search/validation_exception.rb +17 -0
- data/resources/cacert.pem +3785 -0
- data/resources/extensions.json +26 -0
- data/resources/robots.json +208 -0
- data/snapsearch.gemspec +31 -0
- data/spec/lib/rack/qs_spec.rb +34 -0
- data/spec/lib/rack/snap_search/config_spec.rb +56 -0
- data/spec/lib/snap_search/detector_spec.rb +362 -0
- data/spec/lib/snap_search/interceptor_spec.rb +116 -0
- data/spec/spec_helper.rb +6 -0
- metadata +216 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f2959c7bd59e817f2eef37602a4abdc8ae1f1be8
|
4
|
+
data.tar.gz: 3ddaec20074f02943df30adf39de5e0ad60b0ccd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1482fc4b66a72119cb2e26637429d3c48b9561e65ebcb4029fd1b173a60d0b5491aeb23719e7f0666298491e338708c3110ecdee1b0fbbe053b6ff529b520ad6
|
7
|
+
data.tar.gz: 1b847ec7a8e27533a0cde5f39bf0445ac923b3232dd55f85a1676814a73b8ba9dcb2637c48aed327c4cc84474b2b815274572de2ef81f56d5e2b1b9ab64a6387
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
snapsearch-client-ruby (0.1.0)
|
5
|
+
addressable (~> 2.0.0)
|
6
|
+
httpi (~> 2.1.0)
|
7
|
+
rack (~> 1.5.0)
|
8
|
+
version (~> 1.0.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.0.2)
|
14
|
+
celluloid (0.15.2)
|
15
|
+
timers (~> 1.1.0)
|
16
|
+
coderay (1.1.0)
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
ffi (1.9.3-x86-mingw32)
|
19
|
+
formatador (0.2.4)
|
20
|
+
fuubar (1.3.2)
|
21
|
+
rspec (>= 2.14.0, < 3.1.0)
|
22
|
+
ruby-progressbar (~> 1.3)
|
23
|
+
guard (2.4.0)
|
24
|
+
formatador (>= 0.2.4)
|
25
|
+
listen (~> 2.1)
|
26
|
+
lumberjack (~> 1.0)
|
27
|
+
pry (>= 0.9.12)
|
28
|
+
thor (>= 0.18.1)
|
29
|
+
guard-rspec (4.2.5)
|
30
|
+
guard (~> 2.1)
|
31
|
+
rspec (>= 2.14, < 4.0)
|
32
|
+
guard-yard (2.1.0)
|
33
|
+
guard (>= 1.1.0)
|
34
|
+
yard (>= 0.7.0)
|
35
|
+
httpi (2.1.0)
|
36
|
+
rack
|
37
|
+
rubyntlm (~> 0.3.2)
|
38
|
+
listen (2.4.0)
|
39
|
+
celluloid (>= 0.15.2)
|
40
|
+
rb-fsevent (>= 0.9.3)
|
41
|
+
rb-inotify (>= 0.9)
|
42
|
+
lumberjack (1.0.4)
|
43
|
+
method_source (0.8.2)
|
44
|
+
pry (0.9.12.6-x86-mingw32)
|
45
|
+
coderay (~> 1.0)
|
46
|
+
method_source (~> 0.8)
|
47
|
+
slop (~> 3.4)
|
48
|
+
win32console (~> 1.3)
|
49
|
+
rack (1.5.2)
|
50
|
+
rake (10.1.1)
|
51
|
+
rb-fsevent (0.9.4)
|
52
|
+
rb-inotify (0.9.3)
|
53
|
+
ffi (>= 0.5.0)
|
54
|
+
rspec (2.14.1)
|
55
|
+
rspec-core (~> 2.14.0)
|
56
|
+
rspec-expectations (~> 2.14.0)
|
57
|
+
rspec-mocks (~> 2.14.0)
|
58
|
+
rspec-core (2.14.7)
|
59
|
+
rspec-expectations (2.14.5)
|
60
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
61
|
+
rspec-mocks (2.14.5)
|
62
|
+
ruby-progressbar (1.4.1)
|
63
|
+
rubyntlm (0.3.4)
|
64
|
+
slop (3.4.7)
|
65
|
+
thor (0.18.1)
|
66
|
+
timers (1.1.0)
|
67
|
+
version (1.0.0)
|
68
|
+
win32console (1.3.2-x86-mingw32)
|
69
|
+
yard (0.8.7.3)
|
70
|
+
|
71
|
+
PLATFORMS
|
72
|
+
x86-mingw32
|
73
|
+
|
74
|
+
DEPENDENCIES
|
75
|
+
fuubar (~> 1.3.2)
|
76
|
+
guard-rspec (~> 4.2.5)
|
77
|
+
guard-yard (~> 2.1.0)
|
78
|
+
psych
|
79
|
+
racc
|
80
|
+
rake (~> 10.1.1)
|
81
|
+
rspec (~> 2.14.1)
|
82
|
+
rubysl (~> 2.0)
|
83
|
+
snapsearch-client-ruby!
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 SnapSearch
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
SnapSearch-Client-Ruby
|
2
|
+
======================
|
3
|
+
|
4
|
+
[![Build Status](https://travis-ci.org/SnapSearch/SnapSearch-Client-Ruby.png?branch=master)](https://travis-ci.org/SnapSearch/SnapSearch-Client-Ruby)
|
5
|
+
|
6
|
+
Snapsearch Client Ruby is Ruby based framework agnostic HTTP client library for SnapSearch (https://snapsearch.io/).
|
7
|
+
|
8
|
+
SnapSearch provides similar libraries in other languages: https://github.com/SnapSearch/Snapsearch-Clients
|
9
|
+
|
10
|
+
Installation
|
11
|
+
------------
|
12
|
+
|
13
|
+
Usage
|
14
|
+
-----
|
15
|
+
|
16
|
+
Development
|
17
|
+
---------
|
18
|
+
|
19
|
+
Get the bundler dependency management tool.
|
20
|
+
|
21
|
+
```
|
22
|
+
gem install bundler
|
23
|
+
```
|
24
|
+
|
25
|
+
Install/update all dependencies:
|
26
|
+
|
27
|
+
```
|
28
|
+
bundle install
|
29
|
+
```
|
30
|
+
|
31
|
+
See all build tasks:
|
32
|
+
|
33
|
+
```
|
34
|
+
bundle exec rake -T
|
35
|
+
```
|
36
|
+
|
37
|
+
Make your changes. Release a new version tag with (see the other `rake version:bump:... etc` tasks):
|
38
|
+
|
39
|
+
```
|
40
|
+
bundle exec rake version:bump
|
41
|
+
```
|
42
|
+
|
43
|
+
Synchronise and push the tag to Github:
|
44
|
+
|
45
|
+
```
|
46
|
+
git push
|
47
|
+
git push --tags
|
48
|
+
```
|
49
|
+
|
50
|
+
Create the gem package:
|
51
|
+
|
52
|
+
```
|
53
|
+
bundle exec rake gem
|
54
|
+
```
|
55
|
+
|
56
|
+
Push the gem to Ruby Gems:
|
57
|
+
|
58
|
+
```
|
59
|
+
gem push pkg/snapsearch-client-ruby-MAJOR.MINOR.PATCH.gem
|
60
|
+
```
|
61
|
+
|
62
|
+
Setting Up the Detector
|
63
|
+
-----------------------
|
64
|
+
|
65
|
+
The `Detector` class detects if the incoming request is coming from a robot or not.
|
66
|
+
|
67
|
+
Detects if the request came from a search engine robot. It will intercept in cascading order:
|
68
|
+
|
69
|
+
1. on a GET request
|
70
|
+
2. on an HTTP or HTTPS protocol
|
71
|
+
3. not on any ignored robot user agents
|
72
|
+
4. not on any route not matching the whitelist
|
73
|
+
5. not on any route matching the blacklist
|
74
|
+
6. not on any static files that is not a PHP file if it is detected
|
75
|
+
7. on requests with _escaped_fragment_ query parameter
|
76
|
+
8. on any matched robot user agents
|
77
|
+
|
78
|
+
You can customize a few aspects of this process:
|
79
|
+
|
80
|
+
#### User Agents
|
81
|
+
|
82
|
+
Most robots send a unique `user-agent` HTTP header that we match against to confirm if it indeed a request from a robot.
|
83
|
+
We also ignore certain user agents, such as the SnapSearch robot.
|
84
|
+
|
85
|
+
The list of user agents to match and ignore is contained in `resources/robots.json`. You can customize this list through the Detector instance
|
86
|
+
you are working with:
|
87
|
+
|
88
|
+
```
|
89
|
+
# Retrieve the list of user agents to match and ignore:
|
90
|
+
detector.robots # => { 'match' => ['SomeRobot', 'AnotherRobot'], 'ignore' => ['SnapSearch'] }
|
91
|
+
|
92
|
+
# Add a user agent to match against:
|
93
|
+
detector.robots['match'] << 'NewRobot'
|
94
|
+
|
95
|
+
# Add a user agent to ignore:
|
96
|
+
detector.robots['ignore'] << 'MyRobot'
|
97
|
+
|
98
|
+
# Set a new list of user agents to match and ignore:
|
99
|
+
detector.robots = { 'match' => ['WebScraper', 'SillyBot'], 'ignore' => ['MyBotToIgnore'] }
|
100
|
+
|
101
|
+
# Load from a custom JSON file:
|
102
|
+
detector.robots_json = './my_robots.json'
|
103
|
+
detector.robots # => { 'match' => ['MyCustomBot', 'AnotherRobot'], 'ignore' => ['MyLoadedBotFromJSON'] }
|
104
|
+
```
|
105
|
+
|
106
|
+
Tests
|
107
|
+
----
|
108
|
+
|
109
|
+
Tests are written with RSpec. Run tests with `bundle exec rspec spec/`
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'pathname'
|
3
|
+
require 'rake/version_task'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
|
7
|
+
gemspec = Pathname.glob( Pathname.new(__FILE__).join('..', '*.gemspec') ).first
|
8
|
+
$spec = Gem::Specification.load( gemspec.to_s )
|
9
|
+
|
10
|
+
Gem::PackageTask.new($spec) do |task|
|
11
|
+
task.need_zip = false
|
12
|
+
end
|
13
|
+
|
14
|
+
Rake::VersionTask.new do |task|
|
15
|
+
task.with_git_tag = true
|
16
|
+
end
|
17
|
+
|
18
|
+
RSpec::Core::RakeTask.new(:spec)
|
19
|
+
|
20
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# Notes to run:
|
2
|
+
# gem install bundler
|
3
|
+
# bundle install
|
4
|
+
# rackup
|
5
|
+
#
|
6
|
+
# Testing:
|
7
|
+
# Visit http://localhost:9292/
|
8
|
+
# Visit http://localhost:9292/?_escaped_fragment_
|
9
|
+
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'rack/snap_search'
|
12
|
+
|
13
|
+
use Rack::Static, urls: ['/img', '/js', '/css'], root: 'public'
|
14
|
+
|
15
|
+
use Rack::SnapSearch do |config|
|
16
|
+
|
17
|
+
# Required: The email to authenticate with.
|
18
|
+
config.email = 'user@example.com'
|
19
|
+
|
20
|
+
# Required: The key to authenticate with.
|
21
|
+
config.key = 'API_KEY_HERE'
|
22
|
+
|
23
|
+
# Optional: The API URL to send requests to.
|
24
|
+
config.api_url = 'https://snapsearch.io/api/v1/robot' # Default
|
25
|
+
|
26
|
+
# Optional: The CA Cert file to use when sending HTTPS requests to the API.
|
27
|
+
config.ca_cert_file = SnapSearch.root.join('resources', 'cacert.pem') # Default
|
28
|
+
|
29
|
+
# Optional: Check X-Forwarded-Proto because Heroku SSL Support terminates at the load balancer.
|
30
|
+
config.x_forwarded_proto = true # Default
|
31
|
+
|
32
|
+
# Optional: Extra parameters to send to the API.
|
33
|
+
config.parameters = {} # Default
|
34
|
+
|
35
|
+
# Optional: Whitelisted routes. Should be an Array of Regexp instances.
|
36
|
+
config.matched_routes = [] # Default
|
37
|
+
|
38
|
+
# Optional: Blacklisted routes. Should be an Array of Regexp instances.
|
39
|
+
config.ignored_routes = [] # Default
|
40
|
+
|
41
|
+
# Optional: A path of the JSON file containing the user agent whitelist & blacklist.
|
42
|
+
config.robots_json = SnapSearch.root.join('resources', 'robots.json') # Default
|
43
|
+
|
44
|
+
# Optional: A path to the JSON file containing a single Hash with the keys `ignore` and `match`. These keys contain Arrays of Strings (user agents)
|
45
|
+
config.extensions_json = SnapSearch.root.join('resources', 'extensions.json') # Default
|
46
|
+
|
47
|
+
# Optional: Set to `true` to ignore direct requests to files.
|
48
|
+
config.check_static_files = false # Default
|
49
|
+
|
50
|
+
# Optional: A block to run when an exception occurs when making requests to the API.
|
51
|
+
config.on_exception do |exception|
|
52
|
+
p exception
|
53
|
+
end
|
54
|
+
|
55
|
+
# Optional: A block to run before the interception of a bot.
|
56
|
+
config.before_intercept do |url|
|
57
|
+
puts "Before interception\n URL: #{url}"
|
58
|
+
end
|
59
|
+
|
60
|
+
# Optional: A block to run after the interception of a bot.
|
61
|
+
config.after_intercept do |url, response|
|
62
|
+
puts "After interception\n URL: #{url}\n Response: #{response}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Optional: A block to manipulate the response from the SnapSearch API if a bit is intercepted.
|
66
|
+
config.response_callback do |status, headers, body|
|
67
|
+
puts "Response callback\n Status: #{status}\n Headers: #{headers}\n Body: #{body}"
|
68
|
+
|
69
|
+
[ status, headers, body ]
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
class Application
|
75
|
+
|
76
|
+
def call(env)
|
77
|
+
headers = {
|
78
|
+
'Content-Type' => 'text/html',
|
79
|
+
'Cache-Control' => 'public, max-age=86400'
|
80
|
+
}
|
81
|
+
body = File.read('public/index.html')
|
82
|
+
|
83
|
+
[ 200, headers, [body] ]
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
run Application.new
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang='en'>
|
3
|
+
<head>
|
4
|
+
<meta charset='utf-8'>
|
5
|
+
<title>SnapSearch Example</title>
|
6
|
+
<link href="/css/blah.css" media="all" rel="stylesheet" />
|
7
|
+
<!--[if lt IE 9]>
|
8
|
+
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
9
|
+
<![endif]-->
|
10
|
+
<!-- <script src="/js/blah.js"></script> -->
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<h1>SnapSearch Example</h1>
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,90 @@
|
|
1
|
+
PATH
|
2
|
+
remote: /Users/ryguy/Drive/Code/Ruby/Work/Polycadamy/SnapSearch-Client-Ruby
|
3
|
+
specs:
|
4
|
+
snapsearch-client-ruby (0.0.3)
|
5
|
+
addressable (~> 2.0.0)
|
6
|
+
httpi (~> 2.1.0)
|
7
|
+
version (~> 1.0.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
addressable (2.0.2)
|
13
|
+
celluloid (0.15.2)
|
14
|
+
timers (~> 1.1.0)
|
15
|
+
celluloid-io (0.15.0)
|
16
|
+
celluloid (>= 0.15.0)
|
17
|
+
nio4r (>= 0.5.0)
|
18
|
+
coderay (1.1.0)
|
19
|
+
diff-lcs (1.2.5)
|
20
|
+
ffi (1.9.3)
|
21
|
+
formatador (0.2.4)
|
22
|
+
fuubar (1.3.2)
|
23
|
+
rspec (>= 2.14.0, < 3.1.0)
|
24
|
+
ruby-progressbar (~> 1.3)
|
25
|
+
guard (2.4.0)
|
26
|
+
formatador (>= 0.2.4)
|
27
|
+
listen (~> 2.1)
|
28
|
+
lumberjack (~> 1.0)
|
29
|
+
pry (>= 0.9.12)
|
30
|
+
thor (>= 0.18.1)
|
31
|
+
guard-rspec (4.2.5)
|
32
|
+
guard (~> 2.1)
|
33
|
+
rspec (>= 2.14, < 4.0)
|
34
|
+
guard-yard (2.1.0)
|
35
|
+
guard (>= 1.1.0)
|
36
|
+
yard (>= 0.7.0)
|
37
|
+
httpi (2.1.0)
|
38
|
+
rack
|
39
|
+
rubyntlm (~> 0.3.2)
|
40
|
+
listen (2.5.0)
|
41
|
+
celluloid (>= 0.15.2)
|
42
|
+
celluloid-io (>= 0.15.0)
|
43
|
+
rb-fsevent (>= 0.9.3)
|
44
|
+
rb-inotify (>= 0.9)
|
45
|
+
lumberjack (1.0.4)
|
46
|
+
method_source (0.8.2)
|
47
|
+
nio4r (1.0.0)
|
48
|
+
pry (0.9.12.6)
|
49
|
+
coderay (~> 1.0)
|
50
|
+
method_source (~> 0.8)
|
51
|
+
slop (~> 3.4)
|
52
|
+
rack (1.5.2)
|
53
|
+
rack-protection (1.5.2)
|
54
|
+
rack
|
55
|
+
rake (10.1.1)
|
56
|
+
rb-fsevent (0.9.4)
|
57
|
+
rb-inotify (0.9.3)
|
58
|
+
ffi (>= 0.5.0)
|
59
|
+
rspec (2.14.1)
|
60
|
+
rspec-core (~> 2.14.0)
|
61
|
+
rspec-expectations (~> 2.14.0)
|
62
|
+
rspec-mocks (~> 2.14.0)
|
63
|
+
rspec-core (2.14.7)
|
64
|
+
rspec-expectations (2.14.5)
|
65
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
66
|
+
rspec-mocks (2.14.5)
|
67
|
+
ruby-progressbar (1.4.1)
|
68
|
+
rubyntlm (0.3.4)
|
69
|
+
sinatra (1.4.4)
|
70
|
+
rack (~> 1.4)
|
71
|
+
rack-protection (~> 1.4)
|
72
|
+
tilt (~> 1.3, >= 1.3.4)
|
73
|
+
slop (3.4.7)
|
74
|
+
thor (0.18.1)
|
75
|
+
tilt (1.4.1)
|
76
|
+
timers (1.1.0)
|
77
|
+
version (1.0.0)
|
78
|
+
yard (0.8.7.3)
|
79
|
+
|
80
|
+
PLATFORMS
|
81
|
+
ruby
|
82
|
+
|
83
|
+
DEPENDENCIES
|
84
|
+
fuubar (~> 1.3.2)
|
85
|
+
guard-rspec (~> 4.2.5)
|
86
|
+
guard-yard (~> 2.1.0)
|
87
|
+
rake (~> 10.1.1)
|
88
|
+
rspec (~> 2.14.1)
|
89
|
+
sinatra
|
90
|
+
snapsearch-client-ruby!
|