snapsearch-client-ruby 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +9 -0
- data/Gemfile.lock +83 -0
- data/LICENSE +20 -0
- data/README.md +109 -0
- data/Rakefile +20 -0
- data/VERSION +1 -0
- data/examples/rack/Gemfile +5 -0
- data/examples/rack/config.ru +88 -0
- data/examples/rack/public/index.html +15 -0
- data/examples/sinatra/Gemfile +5 -0
- data/examples/sinatra/Gemfile.lock +90 -0
- data/examples/sinatra/config.ru +15 -0
- data/examples/sinatra/lib/sinatra_snap_search.rb +19 -0
- data/examples/sinatra/public/index.html +15 -0
- data/lib/rack/snap_search.rb +143 -0
- data/lib/rack/snap_search/config.rb +85 -0
- data/lib/snap_search.rb +14 -0
- data/lib/snap_search/client.rb +147 -0
- data/lib/snap_search/connection_exception.rb +15 -0
- data/lib/snap_search/detector.rb +248 -0
- data/lib/snap_search/exception.rb +8 -0
- data/lib/snap_search/interceptor.rb +66 -0
- data/lib/snap_search/validation_exception.rb +17 -0
- data/resources/cacert.pem +3785 -0
- data/resources/extensions.json +26 -0
- data/resources/robots.json +208 -0
- data/snapsearch.gemspec +31 -0
- data/spec/lib/rack/qs_spec.rb +34 -0
- data/spec/lib/rack/snap_search/config_spec.rb +56 -0
- data/spec/lib/snap_search/detector_spec.rb +362 -0
- data/spec/lib/snap_search/interceptor_spec.rb +116 -0
- data/spec/spec_helper.rb +6 -0
- metadata +216 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: f2959c7bd59e817f2eef37602a4abdc8ae1f1be8
|
4
|
+
data.tar.gz: 3ddaec20074f02943df30adf39de5e0ad60b0ccd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1482fc4b66a72119cb2e26637429d3c48b9561e65ebcb4029fd1b173a60d0b5491aeb23719e7f0666298491e338708c3110ecdee1b0fbbe053b6ff529b520ad6
|
7
|
+
data.tar.gz: 1b847ec7a8e27533a0cde5f39bf0445ac923b3232dd55f85a1676814a73b8ba9dcb2637c48aed327c4cc84474b2b815274572de2ef81f56d5e2b1b9ab64a6387
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
snapsearch-client-ruby (0.1.0)
|
5
|
+
addressable (~> 2.0.0)
|
6
|
+
httpi (~> 2.1.0)
|
7
|
+
rack (~> 1.5.0)
|
8
|
+
version (~> 1.0.0)
|
9
|
+
|
10
|
+
GEM
|
11
|
+
remote: https://rubygems.org/
|
12
|
+
specs:
|
13
|
+
addressable (2.0.2)
|
14
|
+
celluloid (0.15.2)
|
15
|
+
timers (~> 1.1.0)
|
16
|
+
coderay (1.1.0)
|
17
|
+
diff-lcs (1.2.5)
|
18
|
+
ffi (1.9.3-x86-mingw32)
|
19
|
+
formatador (0.2.4)
|
20
|
+
fuubar (1.3.2)
|
21
|
+
rspec (>= 2.14.0, < 3.1.0)
|
22
|
+
ruby-progressbar (~> 1.3)
|
23
|
+
guard (2.4.0)
|
24
|
+
formatador (>= 0.2.4)
|
25
|
+
listen (~> 2.1)
|
26
|
+
lumberjack (~> 1.0)
|
27
|
+
pry (>= 0.9.12)
|
28
|
+
thor (>= 0.18.1)
|
29
|
+
guard-rspec (4.2.5)
|
30
|
+
guard (~> 2.1)
|
31
|
+
rspec (>= 2.14, < 4.0)
|
32
|
+
guard-yard (2.1.0)
|
33
|
+
guard (>= 1.1.0)
|
34
|
+
yard (>= 0.7.0)
|
35
|
+
httpi (2.1.0)
|
36
|
+
rack
|
37
|
+
rubyntlm (~> 0.3.2)
|
38
|
+
listen (2.4.0)
|
39
|
+
celluloid (>= 0.15.2)
|
40
|
+
rb-fsevent (>= 0.9.3)
|
41
|
+
rb-inotify (>= 0.9)
|
42
|
+
lumberjack (1.0.4)
|
43
|
+
method_source (0.8.2)
|
44
|
+
pry (0.9.12.6-x86-mingw32)
|
45
|
+
coderay (~> 1.0)
|
46
|
+
method_source (~> 0.8)
|
47
|
+
slop (~> 3.4)
|
48
|
+
win32console (~> 1.3)
|
49
|
+
rack (1.5.2)
|
50
|
+
rake (10.1.1)
|
51
|
+
rb-fsevent (0.9.4)
|
52
|
+
rb-inotify (0.9.3)
|
53
|
+
ffi (>= 0.5.0)
|
54
|
+
rspec (2.14.1)
|
55
|
+
rspec-core (~> 2.14.0)
|
56
|
+
rspec-expectations (~> 2.14.0)
|
57
|
+
rspec-mocks (~> 2.14.0)
|
58
|
+
rspec-core (2.14.7)
|
59
|
+
rspec-expectations (2.14.5)
|
60
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
61
|
+
rspec-mocks (2.14.5)
|
62
|
+
ruby-progressbar (1.4.1)
|
63
|
+
rubyntlm (0.3.4)
|
64
|
+
slop (3.4.7)
|
65
|
+
thor (0.18.1)
|
66
|
+
timers (1.1.0)
|
67
|
+
version (1.0.0)
|
68
|
+
win32console (1.3.2-x86-mingw32)
|
69
|
+
yard (0.8.7.3)
|
70
|
+
|
71
|
+
PLATFORMS
|
72
|
+
x86-mingw32
|
73
|
+
|
74
|
+
DEPENDENCIES
|
75
|
+
fuubar (~> 1.3.2)
|
76
|
+
guard-rspec (~> 4.2.5)
|
77
|
+
guard-yard (~> 2.1.0)
|
78
|
+
psych
|
79
|
+
racc
|
80
|
+
rake (~> 10.1.1)
|
81
|
+
rspec (~> 2.14.1)
|
82
|
+
rubysl (~> 2.0)
|
83
|
+
snapsearch-client-ruby!
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2014 SnapSearch
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,109 @@
|
|
1
|
+
SnapSearch-Client-Ruby
|
2
|
+
======================
|
3
|
+
|
4
|
+
[](https://travis-ci.org/SnapSearch/SnapSearch-Client-Ruby)
|
5
|
+
|
6
|
+
Snapsearch Client Ruby is Ruby based framework agnostic HTTP client library for SnapSearch (https://snapsearch.io/).
|
7
|
+
|
8
|
+
SnapSearch provides similar libraries in other languages: https://github.com/SnapSearch/Snapsearch-Clients
|
9
|
+
|
10
|
+
Installation
|
11
|
+
------------
|
12
|
+
|
13
|
+
Usage
|
14
|
+
-----
|
15
|
+
|
16
|
+
Development
|
17
|
+
---------
|
18
|
+
|
19
|
+
Get the bundler dependency management tool.
|
20
|
+
|
21
|
+
```
|
22
|
+
gem install bundler
|
23
|
+
```
|
24
|
+
|
25
|
+
Install/update all dependencies:
|
26
|
+
|
27
|
+
```
|
28
|
+
bundle install
|
29
|
+
```
|
30
|
+
|
31
|
+
See all build tasks:
|
32
|
+
|
33
|
+
```
|
34
|
+
bundle exec rake -T
|
35
|
+
```
|
36
|
+
|
37
|
+
Make your changes. Release a new version tag with (see the other `rake version:bump:... etc` tasks):
|
38
|
+
|
39
|
+
```
|
40
|
+
bundle exec rake version:bump
|
41
|
+
```
|
42
|
+
|
43
|
+
Synchronise and push the tag to Github:
|
44
|
+
|
45
|
+
```
|
46
|
+
git push
|
47
|
+
git push --tags
|
48
|
+
```
|
49
|
+
|
50
|
+
Create the gem package:
|
51
|
+
|
52
|
+
```
|
53
|
+
bundle exec rake gem
|
54
|
+
```
|
55
|
+
|
56
|
+
Push the gem to Ruby Gems:
|
57
|
+
|
58
|
+
```
|
59
|
+
gem push pkg/snapsearch-client-ruby-MAJOR.MINOR.PATCH.gem
|
60
|
+
```
|
61
|
+
|
62
|
+
Setting Up the Detector
|
63
|
+
-----------------------
|
64
|
+
|
65
|
+
The `Detector` class detects if the incoming request is coming from a robot or not.
|
66
|
+
|
67
|
+
Detects if the request came from a search engine robot. It will intercept in cascading order:
|
68
|
+
|
69
|
+
1. on a GET request
|
70
|
+
2. on an HTTP or HTTPS protocol
|
71
|
+
3. not on any ignored robot user agents
|
72
|
+
4. not on any route not matching the whitelist
|
73
|
+
5. not on any route matching the blacklist
|
74
|
+
6. not on any static files that is not a PHP file if it is detected
|
75
|
+
7. on requests with _escaped_fragment_ query parameter
|
76
|
+
8. on any matched robot user agents
|
77
|
+
|
78
|
+
You can customize a few aspects of this process:
|
79
|
+
|
80
|
+
#### User Agents
|
81
|
+
|
82
|
+
Most robots send a unique `user-agent` HTTP header that we match against to confirm if it indeed a request from a robot.
|
83
|
+
We also ignore certain user agents, such as the SnapSearch robot.
|
84
|
+
|
85
|
+
The list of user agents to match and ignore is contained in `resources/robots.json`. You can customize this list through the Detector instance
|
86
|
+
you are working with:
|
87
|
+
|
88
|
+
```
|
89
|
+
# Retrieve the list of user agents to match and ignore:
|
90
|
+
detector.robots # => { 'match' => ['SomeRobot', 'AnotherRobot'], 'ignore' => ['SnapSearch'] }
|
91
|
+
|
92
|
+
# Add a user agent to match against:
|
93
|
+
detector.robots['match'] << 'NewRobot'
|
94
|
+
|
95
|
+
# Add a user agent to ignore:
|
96
|
+
detector.robots['ignore'] << 'MyRobot'
|
97
|
+
|
98
|
+
# Set a new list of user agents to match and ignore:
|
99
|
+
detector.robots = { 'match' => ['WebScraper', 'SillyBot'], 'ignore' => ['MyBotToIgnore'] }
|
100
|
+
|
101
|
+
# Load from a custom JSON file:
|
102
|
+
detector.robots_json = './my_robots.json'
|
103
|
+
detector.robots # => { 'match' => ['MyCustomBot', 'AnotherRobot'], 'ignore' => ['MyLoadedBotFromJSON'] }
|
104
|
+
```
|
105
|
+
|
106
|
+
Tests
|
107
|
+
----
|
108
|
+
|
109
|
+
Tests are written with RSpec. Run tests with `bundle exec rspec spec/`
|
data/Rakefile
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'bundler/setup'
|
2
|
+
require 'pathname'
|
3
|
+
require 'rake/version_task'
|
4
|
+
require 'rubygems/package_task'
|
5
|
+
require 'rspec/core/rake_task'
|
6
|
+
|
7
|
+
gemspec = Pathname.glob( Pathname.new(__FILE__).join('..', '*.gemspec') ).first
|
8
|
+
$spec = Gem::Specification.load( gemspec.to_s )
|
9
|
+
|
10
|
+
Gem::PackageTask.new($spec) do |task|
|
11
|
+
task.need_zip = false
|
12
|
+
end
|
13
|
+
|
14
|
+
Rake::VersionTask.new do |task|
|
15
|
+
task.with_git_tag = true
|
16
|
+
end
|
17
|
+
|
18
|
+
RSpec::Core::RakeTask.new(:spec)
|
19
|
+
|
20
|
+
task :default => :spec
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# Notes to run:
|
2
|
+
# gem install bundler
|
3
|
+
# bundle install
|
4
|
+
# rackup
|
5
|
+
#
|
6
|
+
# Testing:
|
7
|
+
# Visit http://localhost:9292/
|
8
|
+
# Visit http://localhost:9292/?_escaped_fragment_
|
9
|
+
|
10
|
+
require 'bundler/setup'
|
11
|
+
require 'rack/snap_search'
|
12
|
+
|
13
|
+
use Rack::Static, urls: ['/img', '/js', '/css'], root: 'public'
|
14
|
+
|
15
|
+
use Rack::SnapSearch do |config|
|
16
|
+
|
17
|
+
# Required: The email to authenticate with.
|
18
|
+
config.email = 'user@example.com'
|
19
|
+
|
20
|
+
# Required: The key to authenticate with.
|
21
|
+
config.key = 'API_KEY_HERE'
|
22
|
+
|
23
|
+
# Optional: The API URL to send requests to.
|
24
|
+
config.api_url = 'https://snapsearch.io/api/v1/robot' # Default
|
25
|
+
|
26
|
+
# Optional: The CA Cert file to use when sending HTTPS requests to the API.
|
27
|
+
config.ca_cert_file = SnapSearch.root.join('resources', 'cacert.pem') # Default
|
28
|
+
|
29
|
+
# Optional: Check X-Forwarded-Proto because Heroku SSL Support terminates at the load balancer.
|
30
|
+
config.x_forwarded_proto = true # Default
|
31
|
+
|
32
|
+
# Optional: Extra parameters to send to the API.
|
33
|
+
config.parameters = {} # Default
|
34
|
+
|
35
|
+
# Optional: Whitelisted routes. Should be an Array of Regexp instances.
|
36
|
+
config.matched_routes = [] # Default
|
37
|
+
|
38
|
+
# Optional: Blacklisted routes. Should be an Array of Regexp instances.
|
39
|
+
config.ignored_routes = [] # Default
|
40
|
+
|
41
|
+
# Optional: A path of the JSON file containing the user agent whitelist & blacklist.
|
42
|
+
config.robots_json = SnapSearch.root.join('resources', 'robots.json') # Default
|
43
|
+
|
44
|
+
# Optional: A path to the JSON file containing a single Hash with the keys `ignore` and `match`. These keys contain Arrays of Strings (user agents)
|
45
|
+
config.extensions_json = SnapSearch.root.join('resources', 'extensions.json') # Default
|
46
|
+
|
47
|
+
# Optional: Set to `true` to ignore direct requests to files.
|
48
|
+
config.check_static_files = false # Default
|
49
|
+
|
50
|
+
# Optional: A block to run when an exception occurs when making requests to the API.
|
51
|
+
config.on_exception do |exception|
|
52
|
+
p exception
|
53
|
+
end
|
54
|
+
|
55
|
+
# Optional: A block to run before the interception of a bot.
|
56
|
+
config.before_intercept do |url|
|
57
|
+
puts "Before interception\n URL: #{url}"
|
58
|
+
end
|
59
|
+
|
60
|
+
# Optional: A block to run after the interception of a bot.
|
61
|
+
config.after_intercept do |url, response|
|
62
|
+
puts "After interception\n URL: #{url}\n Response: #{response}"
|
63
|
+
end
|
64
|
+
|
65
|
+
# Optional: A block to manipulate the response from the SnapSearch API if a bit is intercepted.
|
66
|
+
config.response_callback do |status, headers, body|
|
67
|
+
puts "Response callback\n Status: #{status}\n Headers: #{headers}\n Body: #{body}"
|
68
|
+
|
69
|
+
[ status, headers, body ]
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
class Application
|
75
|
+
|
76
|
+
def call(env)
|
77
|
+
headers = {
|
78
|
+
'Content-Type' => 'text/html',
|
79
|
+
'Cache-Control' => 'public, max-age=86400'
|
80
|
+
}
|
81
|
+
body = File.read('public/index.html')
|
82
|
+
|
83
|
+
[ 200, headers, [body] ]
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
run Application.new
|
@@ -0,0 +1,15 @@
|
|
1
|
+
<!DOCTYPE html>
|
2
|
+
<html lang='en'>
|
3
|
+
<head>
|
4
|
+
<meta charset='utf-8'>
|
5
|
+
<title>SnapSearch Example</title>
|
6
|
+
<link href="/css/blah.css" media="all" rel="stylesheet" />
|
7
|
+
<!--[if lt IE 9]>
|
8
|
+
<script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
|
9
|
+
<![endif]-->
|
10
|
+
<!-- <script src="/js/blah.js"></script> -->
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
<h1>SnapSearch Example</h1>
|
14
|
+
</body>
|
15
|
+
</html>
|
@@ -0,0 +1,90 @@
|
|
1
|
+
PATH
|
2
|
+
remote: /Users/ryguy/Drive/Code/Ruby/Work/Polycadamy/SnapSearch-Client-Ruby
|
3
|
+
specs:
|
4
|
+
snapsearch-client-ruby (0.0.3)
|
5
|
+
addressable (~> 2.0.0)
|
6
|
+
httpi (~> 2.1.0)
|
7
|
+
version (~> 1.0.0)
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: https://rubygems.org/
|
11
|
+
specs:
|
12
|
+
addressable (2.0.2)
|
13
|
+
celluloid (0.15.2)
|
14
|
+
timers (~> 1.1.0)
|
15
|
+
celluloid-io (0.15.0)
|
16
|
+
celluloid (>= 0.15.0)
|
17
|
+
nio4r (>= 0.5.0)
|
18
|
+
coderay (1.1.0)
|
19
|
+
diff-lcs (1.2.5)
|
20
|
+
ffi (1.9.3)
|
21
|
+
formatador (0.2.4)
|
22
|
+
fuubar (1.3.2)
|
23
|
+
rspec (>= 2.14.0, < 3.1.0)
|
24
|
+
ruby-progressbar (~> 1.3)
|
25
|
+
guard (2.4.0)
|
26
|
+
formatador (>= 0.2.4)
|
27
|
+
listen (~> 2.1)
|
28
|
+
lumberjack (~> 1.0)
|
29
|
+
pry (>= 0.9.12)
|
30
|
+
thor (>= 0.18.1)
|
31
|
+
guard-rspec (4.2.5)
|
32
|
+
guard (~> 2.1)
|
33
|
+
rspec (>= 2.14, < 4.0)
|
34
|
+
guard-yard (2.1.0)
|
35
|
+
guard (>= 1.1.0)
|
36
|
+
yard (>= 0.7.0)
|
37
|
+
httpi (2.1.0)
|
38
|
+
rack
|
39
|
+
rubyntlm (~> 0.3.2)
|
40
|
+
listen (2.5.0)
|
41
|
+
celluloid (>= 0.15.2)
|
42
|
+
celluloid-io (>= 0.15.0)
|
43
|
+
rb-fsevent (>= 0.9.3)
|
44
|
+
rb-inotify (>= 0.9)
|
45
|
+
lumberjack (1.0.4)
|
46
|
+
method_source (0.8.2)
|
47
|
+
nio4r (1.0.0)
|
48
|
+
pry (0.9.12.6)
|
49
|
+
coderay (~> 1.0)
|
50
|
+
method_source (~> 0.8)
|
51
|
+
slop (~> 3.4)
|
52
|
+
rack (1.5.2)
|
53
|
+
rack-protection (1.5.2)
|
54
|
+
rack
|
55
|
+
rake (10.1.1)
|
56
|
+
rb-fsevent (0.9.4)
|
57
|
+
rb-inotify (0.9.3)
|
58
|
+
ffi (>= 0.5.0)
|
59
|
+
rspec (2.14.1)
|
60
|
+
rspec-core (~> 2.14.0)
|
61
|
+
rspec-expectations (~> 2.14.0)
|
62
|
+
rspec-mocks (~> 2.14.0)
|
63
|
+
rspec-core (2.14.7)
|
64
|
+
rspec-expectations (2.14.5)
|
65
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
66
|
+
rspec-mocks (2.14.5)
|
67
|
+
ruby-progressbar (1.4.1)
|
68
|
+
rubyntlm (0.3.4)
|
69
|
+
sinatra (1.4.4)
|
70
|
+
rack (~> 1.4)
|
71
|
+
rack-protection (~> 1.4)
|
72
|
+
tilt (~> 1.3, >= 1.3.4)
|
73
|
+
slop (3.4.7)
|
74
|
+
thor (0.18.1)
|
75
|
+
tilt (1.4.1)
|
76
|
+
timers (1.1.0)
|
77
|
+
version (1.0.0)
|
78
|
+
yard (0.8.7.3)
|
79
|
+
|
80
|
+
PLATFORMS
|
81
|
+
ruby
|
82
|
+
|
83
|
+
DEPENDENCIES
|
84
|
+
fuubar (~> 1.3.2)
|
85
|
+
guard-rspec (~> 4.2.5)
|
86
|
+
guard-yard (~> 2.1.0)
|
87
|
+
rake (~> 10.1.1)
|
88
|
+
rspec (~> 2.14.1)
|
89
|
+
sinatra
|
90
|
+
snapsearch-client-ruby!
|