crawler_detect 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ module Exclusions
6
+ EXCLUSIONS = %q[
7
+ Safari.[\d\.]*
8
+ Firefox.[\d\.]*
9
+ Chrome.[\d\.]*
10
+ Chromium.[\d\.]*
11
+ MSIE.[\d\.]
12
+ Opera\/[\d\.]*
13
+ Mozilla.[\d\.]*
14
+ AppleWebKit.[\d\.]*
15
+ Trident.[\d\.]*
16
+ Windows NT.[\d\.]*
17
+ Android [\d\.]*
18
+ Macintosh.
19
+ Ubuntu
20
+ Linux
21
+ [ ]Intel
22
+ Mac OS X [\d_]*
23
+ (like )?Gecko(.[\d\.]*)?
24
+ KHTML,
25
+ CriOS.[\d\.]*
26
+ CPU iPhone OS ([0-9_])* like Mac OS X
27
+ CPU OS ([0-9_])* like Mac OS X
28
+ iPod
29
+ compatible
30
+ x86_..
31
+ i686
32
+ x64
33
+ X11
34
+ rv:[\d\.]*
35
+ Version.[\d\.]*
36
+ WOW64
37
+ Win64
38
+ Dalvik.[\d\.]*
39
+ \.NET CLR [\d\.]*
40
+ Presto.[\d\.]*
41
+ Media Center PC
42
+ BlackBerry
43
+ Build
44
+ Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
45
+ Opera
46
+ \.NET[\d\.]*
47
+ cubot
48
+ ; M bot
49
+ ; B bot
50
+ ; IDbot
51
+ ; ID bot
52
+ ; POWER BOT
53
+ ;
54
+ ].strip.split(/\n+/).freeze
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ module Headers
6
+ HEADERS = [
7
+ # The default User-Agent string.
8
+ "HTTP_USER_AGENT",
9
+ # Header can occur on devices using Opera Mini.
10
+ "HTTP_X_OPERAMINI_PHONE_UA",
11
+ # Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
12
+ "HTTP_X_DEVICE_USER_AGENT",
13
+ "HTTP_X_ORIGINAL_USER_AGENT",
14
+ "HTTP_X_SKYFIRE_PHONE",
15
+ "HTTP_X_BOLT_PHONE_UA",
16
+ "HTTP_DEVICE_STOCK_UA",
17
+ "HTTP_X_UCBROWSER_DEVICE_UA",
18
+ # Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
19
+ "HTTP_FROM",
20
+ # Seen in use by Netsparker
21
+ "HTTP_X_SCANNER",
22
+ ].freeze
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ VERSION = "0.0.2"
5
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rack
4
+ class CrawlerDetect
5
+ def initialize(app, options = {})
6
+ Rack::Request::Helpers.module_eval do
7
+ def is_crawler?
8
+ env["rack.crawler_detect"][:is_crawler]
9
+ end
10
+
11
+ def crawler_name
12
+ env["rack.crawler_detect"][:crawler_name]
13
+ end
14
+ end
15
+ @app = app
16
+ end
17
+
18
+ def call(env)
19
+ @env = env
20
+ set_env_variables!
21
+ @app.call(@env)
22
+ end
23
+
24
+ private
25
+
26
+ def set_env_variables!
27
+ return @env unless user_agent
28
+ detector = ::CrawlerDetect::Detector.new(user_agent)
29
+ @env["rack.crawler_detect"] = {
30
+ is_crawler: detector.is_crawler?,
31
+ crawler_name: detector.crawler_name,
32
+ }
33
+ end
34
+
35
+ def user_agent
36
+ @user_agent ||= begin
37
+ user_agent_headers.map do |header|
38
+ @env[header]
39
+ end.compact.join(" ")
40
+ end
41
+ end
42
+
43
+ def user_agent_headers
44
+ ::CrawlerDetect::Library.get_array("headers")
45
+ end
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawler_detect
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Pavel Kozlov
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-08-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fuubar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: parallel_tests
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry-meta
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.0.10
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.0.10
97
+ - !ruby/object:Gem::Dependency
98
+ name: rack-test
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.1'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.1'
111
+ description: CrawlerDetect is a library to detect bots/crawlers via the user agent
112
+ email:
113
+ - loadkpi@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".rspec"
120
+ - ".rubocop.yml"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - crawler_detect.gemspec
127
+ - lib/crawler_detect.rb
128
+ - lib/crawler_detect/detector.rb
129
+ - lib/crawler_detect/library.rb
130
+ - lib/crawler_detect/library/crawlers.rb
131
+ - lib/crawler_detect/library/exclusions.rb
132
+ - lib/crawler_detect/library/headers.rb
133
+ - lib/crawler_detect/version.rb
134
+ - lib/rack/crawler_detect.rb
135
+ homepage: https://github.com/loadkpi/crawler_detect
136
+ licenses:
137
+ - MIT
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.7.7
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: 'CrawlerDetect: detect bots/crawlers'
159
+ test_files: []