rack_detect_robots 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/rack_detect_robots.rb +60 -0
  2. metadata +146 -0
@@ -0,0 +1,60 @@
1
+ module Rack
2
+ class DetectRobotsResult
3
+ def self.no_robot
4
+ new(nil)
5
+ end
6
+ def initialize(robot_name)
7
+ @robot_name=robot_name
8
+ @is_robot = !!robot_name #no name no robot
9
+ end
10
+ attr_reader :robot_name
11
+ def robot?
12
+ @is_robot
13
+ end
14
+ end
15
+ class DetectRobots
16
+ #this is fast for upto ca. 200 crawlers then use something different (trie...)
17
+ KNOWN_CRAWLERS = [
18
+ 'yahoo! slurp',
19
+ 'yahoo! de slurp',
20
+ 'googlebot',
21
+ 'ask jeeves',
22
+ 'yanga worldsearch bot',
23
+ 'gigaboti',
24
+ 'ichiro',
25
+ 'msnbot',
26
+ 'crawler',
27
+ 'ia_archiver',
28
+ 'jobverifier',
29
+ 'twiceler',
30
+ 'eurobot',
31
+ 'adsbot-google',
32
+ 'speedy spider',
33
+ 'yacybot',
34
+ 'wget',
35
+ 'findlinks',
36
+ 'feedhub metadatafetcher',
37
+ 'jobrobot.de',
38
+ 'baiduspider']
39
+
40
+ def initialize(app, crawler_regexp=nil)
41
+ @app=app
42
+ @crawler_regexp = (crawler_regexp || Regexp.new(KNOWN_CRAWLERS.map{|bot| Regexp.escape(bot)}.join('|'),"i"))
43
+ end
44
+
45
+ def call(env)
46
+ env[:rack_detect_robots]=test_for_robots(env)
47
+ @app.call(env)
48
+ end
49
+ private
50
+
51
+ def test_for_robots(env)
52
+ user_agent=env["HTTP_USER_AGENT"]
53
+ return DetectRobotsResult.no_robot if user_agent.nil? || user_agent.empty?
54
+ match=@crawler_regexp.match( user_agent )
55
+ return DetectRobotsResult.no_robot unless match
56
+ DetectRobotsResult.new(match[0])
57
+ end
58
+
59
+ end
60
+ end
metadata ADDED
@@ -0,0 +1,146 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rack_detect_robots
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Peter Schrammel
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-09-05 00:00:00.000000000 +02:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ requirement: &75639470 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ! '>='
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: *75639470
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake
28
+ requirement: &75638730 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: *75638730
37
+ - !ruby/object:Gem::Dependency
38
+ name: rake
39
+ requirement: &75637980 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ type: :development
46
+ prerelease: false
47
+ version_requirements: *75637980
48
+ - !ruby/object:Gem::Dependency
49
+ name: jeweler
50
+ requirement: &75637530 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: '1.6'
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *75637530
59
+ - !ruby/object:Gem::Dependency
60
+ name: rspec
61
+ requirement: &75637010 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: '2.6'
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *75637010
70
+ - !ruby/object:Gem::Dependency
71
+ name: rake
72
+ requirement: &75636540 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: *75636540
81
+ - !ruby/object:Gem::Dependency
82
+ name: jeweler
83
+ requirement: &75636050 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ~>
87
+ - !ruby/object:Gem::Version
88
+ version: '1.6'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: *75636050
92
+ - !ruby/object:Gem::Dependency
93
+ name: rspec
94
+ requirement: &75635640 !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ~>
98
+ - !ruby/object:Gem::Version
99
+ version: '2.6'
100
+ type: :development
101
+ prerelease: false
102
+ version_requirements: *75635640
103
+ - !ruby/object:Gem::Dependency
104
+ name: rack
105
+ requirement: &75635220 !ruby/object:Gem::Requirement
106
+ none: false
107
+ requirements:
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 0.9.1
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: *75635220
114
+ description:
115
+ email:
116
+ executables: []
117
+ extensions: []
118
+ extra_rdoc_files: []
119
+ files:
120
+ - lib/rack_detect_robots.rb
121
+ has_rdoc: true
122
+ homepage: http://github.com/experteer/rack_detect_robots
123
+ licenses: []
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ required_rubygems_version: !ruby/object:Gem::Requirement
135
+ none: false
136
+ requirements:
137
+ - - ! '>='
138
+ - !ruby/object:Gem::Version
139
+ version: '0'
140
+ requirements: []
141
+ rubyforge_project:
142
+ rubygems_version: 1.6.2
143
+ signing_key:
144
+ specification_version: 3
145
+ summary: Rack Middleware for detecting robots
146
+ test_files: []