snapsearch-client-ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: f2959c7bd59e817f2eef37602a4abdc8ae1f1be8
4
+ data.tar.gz: 3ddaec20074f02943df30adf39de5e0ad60b0ccd
5
+ SHA512:
6
+ metadata.gz: 1482fc4b66a72119cb2e26637429d3c48b9561e65ebcb4029fd1b173a60d0b5491aeb23719e7f0666298491e338708c3110ecdee1b0fbbe053b6ff529b520ad6
7
+ data.tar.gz: 1b847ec7a8e27533a0cde5f39bf0445ac923b3232dd55f85a1676814a73b8ba9dcb2637c48aed327c4cc84474b2b815274572de2ef81f56d5e2b1b9ab64a6387
data/Gemfile ADDED
@@ -0,0 +1,9 @@
1
+ source 'https://rubygems.org'
2
+
3
+ platforms :rbx do
4
+ gem 'racc'
5
+ gem 'rubysl', '~> 2.0'
6
+ gem 'psych'
7
+ end
8
+
9
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,83 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ snapsearch-client-ruby (0.1.0)
5
+ addressable (~> 2.0.0)
6
+ httpi (~> 2.1.0)
7
+ rack (~> 1.5.0)
8
+ version (~> 1.0.0)
9
+
10
+ GEM
11
+ remote: https://rubygems.org/
12
+ specs:
13
+ addressable (2.0.2)
14
+ celluloid (0.15.2)
15
+ timers (~> 1.1.0)
16
+ coderay (1.1.0)
17
+ diff-lcs (1.2.5)
18
+ ffi (1.9.3-x86-mingw32)
19
+ formatador (0.2.4)
20
+ fuubar (1.3.2)
21
+ rspec (>= 2.14.0, < 3.1.0)
22
+ ruby-progressbar (~> 1.3)
23
+ guard (2.4.0)
24
+ formatador (>= 0.2.4)
25
+ listen (~> 2.1)
26
+ lumberjack (~> 1.0)
27
+ pry (>= 0.9.12)
28
+ thor (>= 0.18.1)
29
+ guard-rspec (4.2.5)
30
+ guard (~> 2.1)
31
+ rspec (>= 2.14, < 4.0)
32
+ guard-yard (2.1.0)
33
+ guard (>= 1.1.0)
34
+ yard (>= 0.7.0)
35
+ httpi (2.1.0)
36
+ rack
37
+ rubyntlm (~> 0.3.2)
38
+ listen (2.4.0)
39
+ celluloid (>= 0.15.2)
40
+ rb-fsevent (>= 0.9.3)
41
+ rb-inotify (>= 0.9)
42
+ lumberjack (1.0.4)
43
+ method_source (0.8.2)
44
+ pry (0.9.12.6-x86-mingw32)
45
+ coderay (~> 1.0)
46
+ method_source (~> 0.8)
47
+ slop (~> 3.4)
48
+ win32console (~> 1.3)
49
+ rack (1.5.2)
50
+ rake (10.1.1)
51
+ rb-fsevent (0.9.4)
52
+ rb-inotify (0.9.3)
53
+ ffi (>= 0.5.0)
54
+ rspec (2.14.1)
55
+ rspec-core (~> 2.14.0)
56
+ rspec-expectations (~> 2.14.0)
57
+ rspec-mocks (~> 2.14.0)
58
+ rspec-core (2.14.7)
59
+ rspec-expectations (2.14.5)
60
+ diff-lcs (>= 1.1.3, < 2.0)
61
+ rspec-mocks (2.14.5)
62
+ ruby-progressbar (1.4.1)
63
+ rubyntlm (0.3.4)
64
+ slop (3.4.7)
65
+ thor (0.18.1)
66
+ timers (1.1.0)
67
+ version (1.0.0)
68
+ win32console (1.3.2-x86-mingw32)
69
+ yard (0.8.7.3)
70
+
71
+ PLATFORMS
72
+ x86-mingw32
73
+
74
+ DEPENDENCIES
75
+ fuubar (~> 1.3.2)
76
+ guard-rspec (~> 4.2.5)
77
+ guard-yard (~> 2.1.0)
78
+ psych
79
+ racc
80
+ rake (~> 10.1.1)
81
+ rspec (~> 2.14.1)
82
+ rubysl (~> 2.0)
83
+ snapsearch-client-ruby!
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2014 SnapSearch
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,109 @@
1
+ SnapSearch-Client-Ruby
2
+ ======================
3
+
4
+ [![Build Status](https://travis-ci.org/SnapSearch/SnapSearch-Client-Ruby.png?branch=master)](https://travis-ci.org/SnapSearch/SnapSearch-Client-Ruby)
5
+
6
+ Snapsearch Client Ruby is Ruby based framework agnostic HTTP client library for SnapSearch (https://snapsearch.io/).
7
+
8
+ SnapSearch provides similar libraries in other languages: https://github.com/SnapSearch/Snapsearch-Clients
9
+
10
+ Installation
11
+ ------------
12
+
13
+ Usage
14
+ -----
15
+
16
+ Development
17
+ ---------
18
+
19
+ Get the bundler dependency management tool.
20
+
21
+ ```
22
+ gem install bundler
23
+ ```
24
+
25
+ Install/update all dependencies:
26
+
27
+ ```
28
+ bundle install
29
+ ```
30
+
31
+ See all build tasks:
32
+
33
+ ```
34
+ bundle exec rake -T
35
+ ```
36
+
37
+ Make your changes. Release a new version tag with (see the other `rake version:bump:... etc` tasks):
38
+
39
+ ```
40
+ bundle exec rake version:bump
41
+ ```
42
+
43
+ Synchronise and push the tag to Github:
44
+
45
+ ```
46
+ git push
47
+ git push --tags
48
+ ```
49
+
50
+ Create the gem package:
51
+
52
+ ```
53
+ bundle exec rake gem
54
+ ```
55
+
56
+ Push the gem to Ruby Gems:
57
+
58
+ ```
59
+ gem push pkg/snapsearch-client-ruby-MAJOR.MINOR.PATCH.gem
60
+ ```
61
+
62
+ Setting Up the Detector
63
+ -----------------------
64
+
65
+ The `Detector` class detects if the incoming request is coming from a robot or not.
66
+
67
+ Detects if the request came from a search engine robot. It will intercept in cascading order:
68
+
69
+ 1. on a GET request
70
+ 2. on an HTTP or HTTPS protocol
71
+ 3. not on any ignored robot user agents
72
+ 4. not on any route not matching the whitelist
73
+ 5. not on any route matching the blacklist
74
+ 6. not on any static files that is not a PHP file if it is detected
75
+ 7. on requests with _escaped_fragment_ query parameter
76
+ 8. on any matched robot user agents
77
+
78
+ You can customize a few aspects of this process:
79
+
80
+ #### User Agents
81
+
82
+ Most robots send a unique `user-agent` HTTP header that we match against to confirm if it indeed a request from a robot.
83
+ We also ignore certain user agents, such as the SnapSearch robot.
84
+
85
+ The list of user agents to match and ignore is contained in `resources/robots.json`. You can customize this list through the Detector instance
86
+ you are working with:
87
+
88
+ ```
89
+ # Retrieve the list of user agents to match and ignore:
90
+ detector.robots # => { 'match' => ['SomeRobot', 'AnotherRobot'], 'ignore' => ['SnapSearch'] }
91
+
92
+ # Add a user agent to match against:
93
+ detector.robots['match'] << 'NewRobot'
94
+
95
+ # Add a user agent to ignore:
96
+ detector.robots['ignore'] << 'MyRobot'
97
+
98
+ # Set a new list of user agents to match and ignore:
99
+ detector.robots = { 'match' => ['WebScraper', 'SillyBot'], 'ignore' => ['MyBotToIgnore'] }
100
+
101
+ # Load from a custom JSON file:
102
+ detector.robots_json = './my_robots.json'
103
+ detector.robots # => { 'match' => ['MyCustomBot', 'AnotherRobot'], 'ignore' => ['MyLoadedBotFromJSON'] }
104
+ ```
105
+
106
+ Tests
107
+ ----
108
+
109
+ Tests are written with RSpec. Run tests with `bundle exec rspec spec/`
data/Rakefile ADDED
@@ -0,0 +1,20 @@
1
+ require 'bundler/setup'
2
+ require 'pathname'
3
+ require 'rake/version_task'
4
+ require 'rubygems/package_task'
5
+ require 'rspec/core/rake_task'
6
+
7
+ gemspec = Pathname.glob( Pathname.new(__FILE__).join('..', '*.gemspec') ).first
8
+ $spec = Gem::Specification.load( gemspec.to_s )
9
+
10
+ Gem::PackageTask.new($spec) do |task|
11
+ task.need_zip = false
12
+ end
13
+
14
+ Rake::VersionTask.new do |task|
15
+ task.with_git_tag = true
16
+ end
17
+
18
+ RSpec::Core::RakeTask.new(:spec)
19
+
20
+ task :default => :spec
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'rack'
4
+
5
+ gemspec :path => '../..'
@@ -0,0 +1,88 @@
1
+ # Notes to run:
2
+ # gem install bundler
3
+ # bundle install
4
+ # rackup
5
+ #
6
+ # Testing:
7
+ # Visit http://localhost:9292/
8
+ # Visit http://localhost:9292/?_escaped_fragment_
9
+
10
+ require 'bundler/setup'
11
+ require 'rack/snap_search'
12
+
13
+ use Rack::Static, urls: ['/img', '/js', '/css'], root: 'public'
14
+
15
+ use Rack::SnapSearch do |config|
16
+
17
+ # Required: The email to authenticate with.
18
+ config.email = 'user@example.com'
19
+
20
+ # Required: The key to authenticate with.
21
+ config.key = 'API_KEY_HERE'
22
+
23
+ # Optional: The API URL to send requests to.
24
+ config.api_url = 'https://snapsearch.io/api/v1/robot' # Default
25
+
26
+ # Optional: The CA Cert file to use when sending HTTPS requests to the API.
27
+ config.ca_cert_file = SnapSearch.root.join('resources', 'cacert.pem') # Default
28
+
29
+ # Optional: Check X-Forwarded-Proto because Heroku SSL Support terminates at the load balancer.
30
+ config.x_forwarded_proto = true # Default
31
+
32
+ # Optional: Extra parameters to send to the API.
33
+ config.parameters = {} # Default
34
+
35
+ # Optional: Whitelisted routes. Should be an Array of Regexp instances.
36
+ config.matched_routes = [] # Default
37
+
38
+ # Optional: Blacklisted routes. Should be an Array of Regexp instances.
39
+ config.ignored_routes = [] # Default
40
+
41
+ # Optional: A path of the JSON file containing the user agent whitelist & blacklist.
42
+ config.robots_json = SnapSearch.root.join('resources', 'robots.json') # Default
43
+
44
+ # Optional: A path to the JSON file containing a single Hash with the keys `ignore` and `match`. These keys contain Arrays of Strings (user agents)
45
+ config.extensions_json = SnapSearch.root.join('resources', 'extensions.json') # Default
46
+
47
+ # Optional: Set to `true` to ignore direct requests to files.
48
+ config.check_static_files = false # Default
49
+
50
+ # Optional: A block to run when an exception occurs when making requests to the API.
51
+ config.on_exception do |exception|
52
+ p exception
53
+ end
54
+
55
+ # Optional: A block to run before the interception of a bot.
56
+ config.before_intercept do |url|
57
+ puts "Before interception\n URL: #{url}"
58
+ end
59
+
60
+ # Optional: A block to run after the interception of a bot.
61
+ config.after_intercept do |url, response|
62
+ puts "After interception\n URL: #{url}\n Response: #{response}"
63
+ end
64
+
65
+ # Optional: A block to manipulate the response from the SnapSearch API if a bit is intercepted.
66
+ config.response_callback do |status, headers, body|
67
+ puts "Response callback\n Status: #{status}\n Headers: #{headers}\n Body: #{body}"
68
+
69
+ [ status, headers, body ]
70
+ end
71
+
72
+ end
73
+
74
+ class Application
75
+
76
+ def call(env)
77
+ headers = {
78
+ 'Content-Type' => 'text/html',
79
+ 'Cache-Control' => 'public, max-age=86400'
80
+ }
81
+ body = File.read('public/index.html')
82
+
83
+ [ 200, headers, [body] ]
84
+ end
85
+
86
+ end
87
+
88
+ run Application.new
@@ -0,0 +1,15 @@
1
+ <!DOCTYPE html>
2
+ <html lang='en'>
3
+ <head>
4
+ <meta charset='utf-8'>
5
+ <title>SnapSearch Example</title>
6
+ <link href="/css/blah.css" media="all" rel="stylesheet" />
7
+ <!--[if lt IE 9]>
8
+ <script src="http://html5shim.googlecode.com/svn/trunk/html5.js"></script>
9
+ <![endif]-->
10
+ <!-- <script src="/js/blah.js"></script> -->
11
+ </head>
12
+ <body>
13
+ <h1>SnapSearch Example</h1>
14
+ </body>
15
+ </html>
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gem 'sinatra'
4
+
5
+ gemspec :path => '../..'
@@ -0,0 +1,90 @@
1
+ PATH
2
+ remote: /Users/ryguy/Drive/Code/Ruby/Work/Polycadamy/SnapSearch-Client-Ruby
3
+ specs:
4
+ snapsearch-client-ruby (0.0.3)
5
+ addressable (~> 2.0.0)
6
+ httpi (~> 2.1.0)
7
+ version (~> 1.0.0)
8
+
9
+ GEM
10
+ remote: https://rubygems.org/
11
+ specs:
12
+ addressable (2.0.2)
13
+ celluloid (0.15.2)
14
+ timers (~> 1.1.0)
15
+ celluloid-io (0.15.0)
16
+ celluloid (>= 0.15.0)
17
+ nio4r (>= 0.5.0)
18
+ coderay (1.1.0)
19
+ diff-lcs (1.2.5)
20
+ ffi (1.9.3)
21
+ formatador (0.2.4)
22
+ fuubar (1.3.2)
23
+ rspec (>= 2.14.0, < 3.1.0)
24
+ ruby-progressbar (~> 1.3)
25
+ guard (2.4.0)
26
+ formatador (>= 0.2.4)
27
+ listen (~> 2.1)
28
+ lumberjack (~> 1.0)
29
+ pry (>= 0.9.12)
30
+ thor (>= 0.18.1)
31
+ guard-rspec (4.2.5)
32
+ guard (~> 2.1)
33
+ rspec (>= 2.14, < 4.0)
34
+ guard-yard (2.1.0)
35
+ guard (>= 1.1.0)
36
+ yard (>= 0.7.0)
37
+ httpi (2.1.0)
38
+ rack
39
+ rubyntlm (~> 0.3.2)
40
+ listen (2.5.0)
41
+ celluloid (>= 0.15.2)
42
+ celluloid-io (>= 0.15.0)
43
+ rb-fsevent (>= 0.9.3)
44
+ rb-inotify (>= 0.9)
45
+ lumberjack (1.0.4)
46
+ method_source (0.8.2)
47
+ nio4r (1.0.0)
48
+ pry (0.9.12.6)
49
+ coderay (~> 1.0)
50
+ method_source (~> 0.8)
51
+ slop (~> 3.4)
52
+ rack (1.5.2)
53
+ rack-protection (1.5.2)
54
+ rack
55
+ rake (10.1.1)
56
+ rb-fsevent (0.9.4)
57
+ rb-inotify (0.9.3)
58
+ ffi (>= 0.5.0)
59
+ rspec (2.14.1)
60
+ rspec-core (~> 2.14.0)
61
+ rspec-expectations (~> 2.14.0)
62
+ rspec-mocks (~> 2.14.0)
63
+ rspec-core (2.14.7)
64
+ rspec-expectations (2.14.5)
65
+ diff-lcs (>= 1.1.3, < 2.0)
66
+ rspec-mocks (2.14.5)
67
+ ruby-progressbar (1.4.1)
68
+ rubyntlm (0.3.4)
69
+ sinatra (1.4.4)
70
+ rack (~> 1.4)
71
+ rack-protection (~> 1.4)
72
+ tilt (~> 1.3, >= 1.3.4)
73
+ slop (3.4.7)
74
+ thor (0.18.1)
75
+ tilt (1.4.1)
76
+ timers (1.1.0)
77
+ version (1.0.0)
78
+ yard (0.8.7.3)
79
+
80
+ PLATFORMS
81
+ ruby
82
+
83
+ DEPENDENCIES
84
+ fuubar (~> 1.3.2)
85
+ guard-rspec (~> 4.2.5)
86
+ guard-yard (~> 2.1.0)
87
+ rake (~> 10.1.1)
88
+ rspec (~> 2.14.1)
89
+ sinatra
90
+ snapsearch-client-ruby!