adsense_crawler_for_private 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README.md +112 -0
- data/Rakefile +39 -0
- data/app/controllers/adsense_crawler_for_private/adsense_crawler_login_controller.rb +39 -0
- data/app/controllers/adsense_crawler_for_private/application_controller.rb +6 -0
- data/app/helpers/adsense_crawler_for_private/application_helper.rb +4 -0
- data/config/routes.rb +3 -0
- data/lib/adsense_crawler_for_private/engine.rb +6 -0
- data/lib/adsense_crawler_for_private/version.rb +3 -0
- data/lib/adsense_crawler_for_private.rb +84 -0
- data/lib/tasks/adsense_crawler_for_private_tasks.rake +4 -0
- data/test/adsense_crawler_for_private_test.rb +7 -0
- data/test/dummy/Rakefile +7 -0
- data/test/dummy/app/controllers/application_controller.rb +3 -0
- data/test/dummy/app/controllers/main_controller.rb +29 -0
- data/test/dummy/app/helpers/application_helper.rb +2 -0
- data/test/dummy/config/application.rb +50 -0
- data/test/dummy/config/boot.rb +10 -0
- data/test/dummy/config/database.yml +25 -0
- data/test/dummy/config/environment.rb +5 -0
- data/test/dummy/config/environments/development.rb +30 -0
- data/test/dummy/config/environments/production.rb +60 -0
- data/test/dummy/config/environments/test.rb +39 -0
- data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
- data/test/dummy/config/initializers/inflections.rb +10 -0
- data/test/dummy/config/initializers/mime_types.rb +5 -0
- data/test/dummy/config/initializers/secret_token.rb +7 -0
- data/test/dummy/config/initializers/session_store.rb +8 -0
- data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
- data/test/dummy/config/locales/en.yml +5 -0
- data/test/dummy/config/routes.rb +6 -0
- data/test/dummy/config.ru +4 -0
- data/test/dummy/db/development.sqlite3 +0 -0
- data/test/dummy/db/test.sqlite3 +0 -0
- data/test/dummy/log/development.log +436 -0
- data/test/dummy/log/test.log +6831 -0
- data/test/dummy/public/404.html +26 -0
- data/test/dummy/public/422.html +26 -0
- data/test/dummy/public/500.html +26 -0
- data/test/dummy/public/favicon.ico +0 -0
- data/test/dummy/script/rails +6 -0
- data/test/integration/adsense_crawler_for_private/filtering_helper_test.rb +98 -0
- data/test/integration/adsense_crawler_for_private/login_test.rb +77 -0
- data/test/test_helper.rb +10 -0
- metadata +187 -0
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright 2011 YOURNAME
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
# AdsenseCrawlerForPrivate
|
2
|
+
|
3
|
+
Easy way to enable adsense crawler to login and see private or custom pages. Basically one custom login filter.
|
4
|
+
|
5
|
+
[<img src="https://secure.travis-ci.org/holli/adsense_crawler_for_private.png" />](http://travis-ci.org/holli/adsense_crawler_for_private)
|
6
|
+
|
7
|
+
## Usage
|
8
|
+
|
9
|
+
|
10
|
+
**Initialize:** In RAILS_ROOT/config/initializers/adsense_crawler_for_private.rb
|
11
|
+
|
12
|
+
```
|
13
|
+
# Configure adsense_crawler_for_private
|
14
|
+
|
15
|
+
AdsenseCrawlerForPrivate.cookie_name = "adsense_crawler"
|
16
|
+
AdsenseCrawlerForPrivate.cookie_domain = :all
|
17
|
+
AdsenseCrawlerForPrivate.crawler_name = "test_name"
|
18
|
+
AdsenseCrawlerForPrivate.crawler_password = "test_password"
|
19
|
+
|
20
|
+
# If you are paranoid you can specify ip addresses that are ok to the crawlers to access
|
21
|
+
# AdsenseCrawlerForPrivate.ip_ranges = [IPAddr.new("127.0.0.1"), IPAddr.new("192.168.0.1/20")]
|
22
|
+
|
23
|
+
```
|
24
|
+
|
25
|
+
**Routes:**
|
26
|
+
|
27
|
+
```
|
28
|
+
# Give url "http://domain.dom/adsense_crawler_for_private/login" for crawlers to log in
|
29
|
+
# or directly "http://domain.dom/adsense_crawler_for_private/login?name=test_name&password=test_password"
|
30
|
+
mount AdsenseCrawlerForPrivate::Engine => "/adsense_crawler_for_private"
|
31
|
+
```
|
32
|
+
|
33
|
+
**Rendering etc usage:** in controller define what to render for crawlers
|
34
|
+
|
35
|
+
```
|
36
|
+
|
37
|
+
def SomeController << ApplicationController
|
38
|
+
|
39
|
+
before_filter :adsense_crawler_private_specific_page # normal authentication filters after this one
|
40
|
+
|
41
|
+
def adsense_crawler_private_specific_page
|
42
|
+
if AdsenseCrawlerForPrivate.login_check(cookies, request)
|
43
|
+
# here info how to render page for crawler
|
44
|
+
# e.g render 'crawler_ad_page'
|
45
|
+
# or creating a dummy login info
|
46
|
+
return false # so that normal authentication filters etc are not effective
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Here would be rest of the controller
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
```
|
55
|
+
|
56
|
+
Or if you want only to check alongside other authentication you can call in your
|
57
|
+
own authentication filters AdsenseCrawlerForPrivate.login_check(request)-method
|
58
|
+
directly.
|
59
|
+
|
60
|
+
|
61
|
+
### Testing your own stuff
|
62
|
+
|
63
|
+
You can test your filters by setting cookie in the same way as in AdsenseCrawlerLoginController#login .
|
64
|
+
|
65
|
+
If you have enabled ip_ranges option you have to make sure that the ip that tests use is enabled for crawlers.
|
66
|
+
|
67
|
+
```
|
68
|
+
test "here would be a test for logged crawler in functional tests"
|
69
|
+
# Dummy login for crawler, These should be configured in initializers
|
70
|
+
crawler_name="adsense_crawler"; crawler_password = "adsense_pass";
|
71
|
+
@request.remote_addr = ip = "127.0.0.1"
|
72
|
+
|
73
|
+
# In some frameworks cookies.signed would be enough. Some will need you to sign the cookie by yourself.
|
74
|
+
# If you have a better way, let me know
|
75
|
+
# same as cookies.signed[AdsenseCrawlerForPrivate.cookie_name] = AdsenseCrawlerForPrivate.cookie_hash(crawler_name, ip)
|
76
|
+
@request.cookies[AdsenseCrawlerForPrivate.cookie_name] =
|
77
|
+
ActiveSupport::MessageVerifier.new(Dummy::Application.config.secret_token).generate(
|
78
|
+
AdsenseCrawlerForPrivate.cookie_str(crawler_name, crawler_password, 2.days.from_now, ip))
|
79
|
+
|
80
|
+
|
81
|
+
#Normal test in here
|
82
|
+
get :index
|
83
|
+
assert_response :success
|
84
|
+
assert response.body.include?("Hi normal crawler")
|
85
|
+
end
|
86
|
+
|
87
|
+
test "here would be a test for non-logged crawler"
|
88
|
+
get :index
|
89
|
+
assert_response :success
|
90
|
+
assert response.body.include?("Hi normal user")
|
91
|
+
end
|
92
|
+
```
|
93
|
+
|
94
|
+
### robots.txt
|
95
|
+
|
96
|
+
Remember to update robots.txt if you have previously forbidden adsense to crawl certain pages.
|
97
|
+
|
98
|
+
## Requirements
|
99
|
+
|
100
|
+
Gem has been tested with ruby 1.8.7, 1.9.2 and Rails 3.1.
|
101
|
+
|
102
|
+
[<img src="https://secure.travis-ci.org/holli/adsense_crawler_for_private.png" />](http://travis-ci.org/holli/adsense_crawler_for_private)
|
103
|
+
|
104
|
+
http://travis-ci.org/#!/holli/adsense_crawler_for_private
|
105
|
+
|
106
|
+
## Support
|
107
|
+
|
108
|
+
Submit suggestions or feature requests as a GitHub Issue or Pull Request. Remember to update tests. Tests are quite extensive.
|
109
|
+
|
110
|
+
## License
|
111
|
+
|
112
|
+
Released under the MIT license (http://www.opensource.org/licenses/mit-license.php)
|
data/Rakefile
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
begin
|
3
|
+
require 'bundler/setup'
|
4
|
+
rescue LoadError
|
5
|
+
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
6
|
+
end
|
7
|
+
begin
|
8
|
+
require 'rdoc/task'
|
9
|
+
rescue LoadError
|
10
|
+
require 'rdoc/rdoc'
|
11
|
+
require 'rake/rdoctask'
|
12
|
+
RDoc::Task = Rake::RDocTask
|
13
|
+
end
|
14
|
+
|
15
|
+
RDoc::Task.new(:rdoc) do |rdoc|
|
16
|
+
rdoc.rdoc_dir = 'rdoc'
|
17
|
+
rdoc.title = 'AdsenseCrawlerForPrivate'
|
18
|
+
rdoc.options << '--line-numbers'
|
19
|
+
rdoc.rdoc_files.include('README.rdoc')
|
20
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
21
|
+
end
|
22
|
+
|
23
|
+
APP_RAKEFILE = File.expand_path("../test/dummy/Rakefile", __FILE__)
|
24
|
+
load 'rails/tasks/engine.rake'
|
25
|
+
|
26
|
+
|
27
|
+
Bundler::GemHelper.install_tasks
|
28
|
+
|
29
|
+
require 'rake/testtask'
|
30
|
+
|
31
|
+
Rake::TestTask.new(:test) do |t|
|
32
|
+
t.libs << 'lib'
|
33
|
+
t.libs << 'test'
|
34
|
+
t.pattern = 'test/**/*_test.rb'
|
35
|
+
t.verbose = false
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
task :default => :test
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module AdsenseCrawlerForPrivate
|
2
|
+
class AdsenseCrawlerLoginController < ApplicationController
|
3
|
+
|
4
|
+
# Making sure that verify_authenticity_token is not on, adsense does not have it
|
5
|
+
skip_before_filter :verify_authenticity_token, :only => :login
|
6
|
+
|
7
|
+
def login
|
8
|
+
unless AdsenseCrawlerForPrivate.crawler_password.blank?
|
9
|
+
|
10
|
+
crawler_name = params[:name]
|
11
|
+
crawler_password = params[:password]
|
12
|
+
|
13
|
+
if (AdsenseCrawlerForPrivate.ip_check(request) and
|
14
|
+
crawler_name == AdsenseCrawlerForPrivate.crawler_name and
|
15
|
+
crawler_password == AdsenseCrawlerForPrivate.crawler_password)
|
16
|
+
|
17
|
+
cookies.signed[AdsenseCrawlerForPrivate.cookie_name] = AdsenseCrawlerForPrivate.cookie_hash(crawler_name, crawler_password, request.remote_addr)
|
18
|
+
|
19
|
+
AdsenseCrawlerForPrivate.logger.warn "login successfully. Crawler_name: #{crawler_name}"
|
20
|
+
|
21
|
+
render :text => 'crawler login ok', :status => 200
|
22
|
+
else
|
23
|
+
cookies.delete(AdsenseCrawlerForPrivate.cookie_name, :domain => AdsenseCrawlerForPrivate.cookie_domain)
|
24
|
+
|
25
|
+
AdsenseCrawlerForPrivate.logger.warn "login unsuccessful. Crawler_name: #{crawler_name}, crawler_password: #{crawler_password}, crawler_ip: #{request.remote_addr}"
|
26
|
+
|
27
|
+
render :text => 'crawler login unsuccessful', :status => 401 # 401 unauthorized
|
28
|
+
end
|
29
|
+
|
30
|
+
else
|
31
|
+
str = "AdsenseCrawlerForPrivate not configured, no password given."
|
32
|
+
AdsenseCrawlerForPrivate.logger.warn(str)
|
33
|
+
render :text => str, :status => 401
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
data/config/routes.rb
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'ipaddr'
|
2
|
+
require "adsense_crawler_for_private/engine"
|
3
|
+
|
4
|
+
module AdsenseCrawlerForPrivate
|
5
|
+
|
6
|
+
mattr_accessor :cookie_name, :cookie_domain, :crawler_name, :crawler_password, :ip_ranges
|
7
|
+
|
8
|
+
protected :crawler_password
|
9
|
+
|
10
|
+
# Checks crawler cookie, returns true if logged in
|
11
|
+
def self.login_check(cookies, request)
|
12
|
+
cookie = cookies.signed[AdsenseCrawlerForPrivate.cookie_name]
|
13
|
+
|
14
|
+
login_ok = false
|
15
|
+
|
16
|
+
if !cookie.blank?
|
17
|
+
self.logger.info "login_check: cookie found #{cookie}"
|
18
|
+
begin
|
19
|
+
name, password, expiry_time, remote_addr = JSON.parse(cookie)
|
20
|
+
expiry_time = Time.parse(expiry_time)
|
21
|
+
|
22
|
+
if (name == AdsenseCrawlerForPrivate.crawler_name and
|
23
|
+
password == Digest::SHA1.hexdigest(AdsenseCrawlerForPrivate.crawler_password) and
|
24
|
+
expiry_time > Time.now and
|
25
|
+
request.remote_addr == remote_addr and
|
26
|
+
self.ip_check(request))
|
27
|
+
login_ok = true
|
28
|
+
self.logger.warn "login_check was ok for #{name}"
|
29
|
+
end
|
30
|
+
rescue JSON::ParserError => e
|
31
|
+
self.logger.warn "login_check problem parsing cookie json: #{e.inspect}"
|
32
|
+
ensure
|
33
|
+
unless login_ok
|
34
|
+
self.logger.warn "login_check wasn't ok, even though cookie was found."
|
35
|
+
cookies.delete(AdsenseCrawlerForPrivate.cookie_name)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
return login_ok
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.cookie_hash(crawler_name, crawler_password, request_or_ip)
|
44
|
+
{:value => AdsenseCrawlerForPrivate.cookie_str(crawler_name, crawler_password, 2.days.from_now, request_or_ip),
|
45
|
+
:expires => 2.days.from_now,
|
46
|
+
:domain => AdsenseCrawlerForPrivate.cookie_domain}
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.cookie_str(crawler_name, crawler_password, expire_time, request_or_ip)
|
50
|
+
ip_str = request_or_ip.respond_to?(:remote_addr) ? request_or_ip.remote_addr : request_or_ip.to_s
|
51
|
+
|
52
|
+
return [crawler_name, Digest::SHA1.hexdigest(crawler_password),
|
53
|
+
expire_time.httpdate, ip_str].to_json
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.ip_check(request)
|
57
|
+
unless AdsenseCrawlerForPrivate.ip_ranges.nil?
|
58
|
+
ip_check = ::IPAddr.new(request.remote_addr)
|
59
|
+
AdsenseCrawlerForPrivate.ip_ranges.each do |ip_accepted|
|
60
|
+
return true if ip_accepted.include?(ip_check)
|
61
|
+
end
|
62
|
+
return false
|
63
|
+
end
|
64
|
+
return true
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.logger
|
68
|
+
Logger
|
69
|
+
end
|
70
|
+
|
71
|
+
class Logger
|
72
|
+
def self.info(str)
|
73
|
+
Rails.logger.info("AdsenseCrawlerForPrivate: #{str}")
|
74
|
+
end
|
75
|
+
def self.warn(str)
|
76
|
+
Rails.logger.warn("AdsenseCrawlerForPrivate: #{str}")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
#class ApplicationController < ActionController::Base
|
82
|
+
# helper MyEngine::SharedEngineHelper
|
83
|
+
# end
|
84
|
+
#end
|
data/test/dummy/Rakefile
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
#!/usr/bin/env rake
|
2
|
+
# Add your own tasks in files placed in lib/tasks ending in .rake,
|
3
|
+
# for example lib/tasks/capistrano.rake, and they will automatically be available to Rake.
|
4
|
+
|
5
|
+
require File.expand_path('../config/application', __FILE__)
|
6
|
+
|
7
|
+
Dummy::Application.load_tasks
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class MainController < ApplicationController
|
2
|
+
|
3
|
+
before_filter :adsense_crawler_private_specific_page, :only => :forbidden_render
|
4
|
+
before_filter :redirect_if_not_logged, :only => :forbidden_render
|
5
|
+
|
6
|
+
|
7
|
+
def normal_render
|
8
|
+
#debugger
|
9
|
+
render :text => 'this is rendered normally'
|
10
|
+
end
|
11
|
+
|
12
|
+
def forbidden_render
|
13
|
+
render :text => 'never used'
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
|
18
|
+
def redirect_if_not_logged
|
19
|
+
redirect_to "http://www.you-were-not-logged-in.inv"
|
20
|
+
end
|
21
|
+
|
22
|
+
def adsense_crawler_private_specific_page
|
23
|
+
if AdsenseCrawlerForPrivate.login_check(cookies, request)
|
24
|
+
render :text => "private render for crawler"
|
25
|
+
return false
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path('../boot', __FILE__)
|
2
|
+
|
3
|
+
# This is a custom way of disable activerecord database, used instead of default "require 'rails/all'"
|
4
|
+
# see http://stackoverflow.com/questions/2212709/remove-activerecord-in-rails-3-beta for more info
|
5
|
+
require "action_controller/railtie"
|
6
|
+
require "action_mailer/railtie"
|
7
|
+
require "active_resource/railtie"
|
8
|
+
require "sprockets/railtie"
|
9
|
+
|
10
|
+
Bundler.require
|
11
|
+
require "adsense_crawler_for_private"
|
12
|
+
|
13
|
+
module Dummy
|
14
|
+
class Application < Rails::Application
|
15
|
+
# Settings in config/environments/* take precedence over those specified here.
|
16
|
+
# Application configuration should go into files in config/initializers
|
17
|
+
# -- all .rb files in that directory are automatically loaded.
|
18
|
+
|
19
|
+
# Custom directories with classes and modules you want to be autoloadable.
|
20
|
+
# config.autoload_paths += %W(#{config.root}/extras)
|
21
|
+
|
22
|
+
# Only load the plugins named here, in the order given (default is alphabetical).
|
23
|
+
# :all can be used as a placeholder for all plugins not explicitly named.
|
24
|
+
# config.plugins = [ :exception_notification, :ssl_requirement, :all ]
|
25
|
+
|
26
|
+
# Activate observers that should always be running.
|
27
|
+
# config.active_record.observers = :cacher, :garbage_collector, :forum_observer
|
28
|
+
|
29
|
+
# Set Time.zone default to the specified zone and make Active Record auto-convert to this zone.
|
30
|
+
# Run "rake -D time" for a list of tasks for finding time zone names. Default is UTC.
|
31
|
+
# config.time_zone = 'Central Time (US & Canada)'
|
32
|
+
|
33
|
+
# The default locale is :en and all translations from config/locales/*.rb,yml are auto loaded.
|
34
|
+
# config.i18n.load_path += Dir[Rails.root.join('my', 'locales', '*.{rb,yml}').to_s]
|
35
|
+
# config.i18n.default_locale = :de
|
36
|
+
|
37
|
+
# Configure the default encoding used in templates for Ruby 1.9.
|
38
|
+
config.encoding = "utf-8"
|
39
|
+
|
40
|
+
# Configure sensitive parameters which will be filtered from the log file.
|
41
|
+
config.filter_parameters += [:password]
|
42
|
+
|
43
|
+
# Enable the asset pipeline
|
44
|
+
config.assets.enabled = true
|
45
|
+
|
46
|
+
# Version of your assets, change this if you want to expire all your assets
|
47
|
+
config.assets.version = '1.0'
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|