rack_detect_robots 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/rack_detect_robots.rb +60 -0
  2. metadata +146 -0
@@ -0,0 +1,60 @@
1
+ module Rack
2
+ class DetectRobotsResult
3
+ def self.no_robot
4
+ new(nil)
5
+ end
6
+ def initialize(robot_name)
7
+ @robot_name=robot_name
8
+ @is_robot = !!robot_name #no name no robot
9
+ end
10
+ attr_reader :robot_name
11
+ def robot?
12
+ @is_robot
13
+ end
14
+ end
15
+ class DetectRobots
16
+ #this is fast for upto ca. 200 crawlers then use something different (trie...)
17
+ KNOWN_CRAWLERS = [
18
+ 'yahoo! slurp',
19
+ 'yahoo! de slurp',
20
+ 'googlebot',
21
+ 'ask jeeves',
22
+ 'yanga worldsearch bot',
23
+ 'gigaboti',
24
+ 'ichiro',
25
+ 'msnbot',
26
+ 'crawler',
27
+ 'ia_archiver',
28
+ 'jobverifier',
29
+ 'twiceler',
30
+ 'eurobot',
31
+ 'adsbot-google',
32
+ 'speedy spider',
33
+ 'yacybot',
34
+ 'wget',
35
+ 'findlinks',
36
+ 'feedhub metadatafetcher',
37
+ 'jobrobot.de',
38
+ 'baiduspider']
39
+
40
+ def initialize(app, crawler_regexp=nil)
41
+ @app=app
42
+ @crawler_regexp = (crawler_regexp || Regexp.new(KNOWN_CRAWLERS.map{|bot| Regexp.escape(bot)}.join('|'),"i"))
43
+ end
44
+
45
+ def call(env)
46
+ env[:rack_detect_robots]=test_for_robots(env)
47
+ @app.call(env)
48
+ end
49
+ private
50
+
51
+ def test_for_robots(env)
52
+ user_agent=env["HTTP_USER_AGENT"]
53
+ return DetectRobotsResult.no_robot if user_agent.nil? || user_agent.empty?
54
+ match=@crawler_regexp.match( user_agent )
55
+ return DetectRobotsResult.no_robot unless match
56
+ DetectRobotsResult.new(match[0])
57
+ end
58
+
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack_detect_robots
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Schrammel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-05 00:00:00.000000000 +02:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ requirement: &75639470 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *75639470
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake
28
+ requirement: &75638730 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *75638730
37
+ - !ruby/object:Gem::Dependency
38
+ name: rake
39
+ requirement: &75637980 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *75637980
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ requirement: &75637530 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: '1.6'
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *75637530
59
+ - !ruby/object:Gem::Dependency
60
+ name: rspec
61
+ requirement: &75637010 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: '2.6'
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *75637010
70
+ - !ruby/object:Gem::Dependency
71
+ name: rake
72
+ requirement: &75636540 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: *75636540
81
+ - !ruby/object:Gem::Dependency
82
+ name: jeweler
83
+ requirement: &75636050 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ~>
87
+ - !ruby/object:Gem::Version
88
+ version: '1.6'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: *75636050
92
+ - !ruby/object:Gem::Dependency
93
+ name: rspec
94
+ requirement: &75635640 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: '2.6'
100
+ type: :development
101
+ prerelease: false
102
+ version_requirements: *75635640
103
+ - !ruby/object:Gem::Dependency
104
+ name: rack
105
+ requirement: &75635220 !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.9.1
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: *75635220
114
+ description:
115
+ email:
116
+ executables: []
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - lib/rack_detect_robots.rb
121
+ has_rdoc: true
122
+ homepage: http://github.com/experteer/rack_detect_robots
123
+ licenses: []
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ requirements: []
141
+ rubyforge_project:
142
+ rubygems_version: 1.6.2
143
+ signing_key:
144
+ specification_version: 3
145
+ summary: Rack Middleware for detecting robots
146
+ test_files: []