crawler_detect 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ module Exclusions
6
+ EXCLUSIONS = %q[
7
+ Safari.[\d\.]*
8
+ Firefox.[\d\.]*
9
+ Chrome.[\d\.]*
10
+ Chromium.[\d\.]*
11
+ MSIE.[\d\.]
12
+ Opera\/[\d\.]*
13
+ Mozilla.[\d\.]*
14
+ AppleWebKit.[\d\.]*
15
+ Trident.[\d\.]*
16
+ Windows NT.[\d\.]*
17
+ Android [\d\.]*
18
+ Macintosh.
19
+ Ubuntu
20
+ Linux
21
+ [ ]Intel
22
+ Mac OS X [\d_]*
23
+ (like )?Gecko(.[\d\.]*)?
24
+ KHTML,
25
+ CriOS.[\d\.]*
26
+ CPU iPhone OS ([0-9_])* like Mac OS X
27
+ CPU OS ([0-9_])* like Mac OS X
28
+ iPod
29
+ compatible
30
+ x86_..
31
+ i686
32
+ x64
33
+ X11
34
+ rv:[\d\.]*
35
+ Version.[\d\.]*
36
+ WOW64
37
+ Win64
38
+ Dalvik.[\d\.]*
39
+ \.NET CLR [\d\.]*
40
+ Presto.[\d\.]*
41
+ Media Center PC
42
+ BlackBerry
43
+ Build
44
+ Opera Mini\/\d{1,2}\.\d{1,2}\.[\d\.]*\/\d{1,2}\.
45
+ Opera
46
+ \.NET[\d\.]*
47
+ cubot
48
+ ; M bot
49
+ ; B bot
50
+ ; IDbot
51
+ ; ID bot
52
+ ; POWER BOT
53
+ ;
54
+ ].strip.split(/\n+/).freeze
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ module Library
5
+ module Headers
6
+ HEADERS = [
7
+ # The default User-Agent string.
8
+ "HTTP_USER_AGENT",
9
+ # Header can occur on devices using Opera Mini.
10
+ "HTTP_X_OPERAMINI_PHONE_UA",
11
+ # Vodafone specific header: http://www.seoprinciple.com/mobile-web-community-still-angry-at-vodafone/24/
12
+ "HTTP_X_DEVICE_USER_AGENT",
13
+ "HTTP_X_ORIGINAL_USER_AGENT",
14
+ "HTTP_X_SKYFIRE_PHONE",
15
+ "HTTP_X_BOLT_PHONE_UA",
16
+ "HTTP_DEVICE_STOCK_UA",
17
+ "HTTP_X_UCBROWSER_DEVICE_UA",
18
+ # Sometimes, bots (especially Google) use a genuine user agent, but fill this header in with their email address
19
+ "HTTP_FROM",
20
+ # Seen in use by Netsparker
21
+ "HTTP_X_SCANNER",
22
+ ].freeze
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CrawlerDetect
4
+ VERSION = "0.0.2"
5
+ end
@@ -0,0 +1,47 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rack
4
+ class CrawlerDetect
5
+ def initialize(app, options = {})
6
+ Rack::Request::Helpers.module_eval do
7
+ def is_crawler?
8
+ env["rack.crawler_detect"][:is_crawler]
9
+ end
10
+
11
+ def crawler_name
12
+ env["rack.crawler_detect"][:crawler_name]
13
+ end
14
+ end
15
+ @app = app
16
+ end
17
+
18
+ def call(env)
19
+ @env = env
20
+ set_env_variables!
21
+ @app.call(@env)
22
+ end
23
+
24
+ private
25
+
26
+ def set_env_variables!
27
+ return @env unless user_agent
28
+ detector = ::CrawlerDetect::Detector.new(user_agent)
29
+ @env["rack.crawler_detect"] = {
30
+ is_crawler: detector.is_crawler?,
31
+ crawler_name: detector.crawler_name,
32
+ }
33
+ end
34
+
35
+ def user_agent
36
+ @user_agent ||= begin
37
+ user_agent_headers.map do |header|
38
+ @env[header]
39
+ end.compact.join(" ")
40
+ end
41
+ end
42
+
43
+ def user_agent_headers
44
+ ::CrawlerDetect::Library.get_array("headers")
45
+ end
46
+ end
47
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: crawler_detect
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Pavel Kozlov
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-08-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.15'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.15'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: fuubar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: parallel_tests
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '2.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '2.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry-meta
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.0.10
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.0.10
97
+ - !ruby/object:Gem::Dependency
98
+ name: rack-test
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '1.1'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.1'
111
+ description: CrawlerDetect is a library to detect bots/crawlers via the user agent
112
+ email:
113
+ - loadkpi@gmail.com
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".gitignore"
119
+ - ".rspec"
120
+ - ".rubocop.yml"
121
+ - ".travis.yml"
122
+ - Gemfile
123
+ - LICENSE.txt
124
+ - README.md
125
+ - Rakefile
126
+ - crawler_detect.gemspec
127
+ - lib/crawler_detect.rb
128
+ - lib/crawler_detect/detector.rb
129
+ - lib/crawler_detect/library.rb
130
+ - lib/crawler_detect/library/crawlers.rb
131
+ - lib/crawler_detect/library/exclusions.rb
132
+ - lib/crawler_detect/library/headers.rb
133
+ - lib/crawler_detect/version.rb
134
+ - lib/rack/crawler_detect.rb
135
+ homepage: https://github.com/loadkpi/crawler_detect
136
+ licenses:
137
+ - MIT
138
+ metadata: {}
139
+ post_install_message:
140
+ rdoc_options: []
141
+ require_paths:
142
+ - lib
143
+ required_ruby_version: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ">="
146
+ - !ruby/object:Gem::Version
147
+ version: '0'
148
+ required_rubygems_version: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ requirements: []
154
+ rubyforge_project:
155
+ rubygems_version: 2.7.7
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: 'CrawlerDetect: detect bots/crawlers'
159
+ test_files: []