crawler_detect 0.1.12 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6e03716868421dc64e5acb715b35599776fbe3951bbeb7e462892ed3a6a51d6
4
- data.tar.gz: 10b00a68949482af5cb6064f538e0829f2bcb836fe5605da48b48f0af7bbad71
3
+ metadata.gz: 6535ee2f876e0b1e05444bf0bb4a7a9082fd70ba4a454d7b1e2e9737b4b84d41
4
+ data.tar.gz: ff05c16b5cd08416dfded4a8ca3c83ee73d2599dc2641aa8d212a7d3d72f3bd0
5
5
  SHA512:
6
- metadata.gz: cb66a7c310ca791038ca199c68626d522f99cb4ddff57382b07ecb91b824e44292362351ccb71622051ece1d8c7a900019627fed731e46f5474507b8b20f6aaa
7
- data.tar.gz: 3f4c2484f4155d975915dd21e1c62d6f8dd19ddda4023cff9530701cf628ddf94c10f5e097c3cb85ffbf3d71686bd91d9b9ac71da59cf47933549fd847d2c350
6
+ metadata.gz: 384c92b53a77f3b8280259897060f7fc74e6d0ac214d6469cfdd87a84c290286ae9bacace2875c2c033d69a77eb863cb7eee5053552d0c619b85f1f15ba9a79d
7
+ data.tar.gz: 3904d9beef1ff2211881408d7ab2c60caf44c2103d07f8188a5c8d9454e58d274f1779ca4d0ad0ca53a548631062dd9f4f97c32c0ee9e811191b17c117b59db5
@@ -0,0 +1,38 @@
1
+ name: build
2
+
3
+ on: [push]
4
+
5
+ jobs:
6
+ lint:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v2
10
+ - uses: ruby/setup-ruby@v1
11
+ with:
12
+ ruby-version: 2.7
13
+ - name: install gems
14
+ run: |
15
+ gem install bundler
16
+ bundle install --jobs 4 --retry 3
17
+ - run: bundle exec rubocop
18
+ test:
19
+ needs: lint
20
+ runs-on: ubuntu-latest
21
+ strategy:
22
+ fail-fast: false
23
+ matrix:
24
+ ruby: [2.5, 2.6, 2.7]
25
+ test-group: [1, 2, 3, 4]
26
+ name: Ruby ${{ matrix.ruby }}, test-group ${{ matrix.test-group }}
27
+ steps:
28
+ - uses: actions/checkout@v2
29
+ - uses: ruby/setup-ruby@v1
30
+ with:
31
+ ruby-version: ${{ matrix.ruby }}
32
+ - name: install gems
33
+ run: |
34
+ gem install bundler
35
+ bundle install --jobs 4 --retry 3
36
+ - name: test
37
+ run: |
38
+ bundle exec parallel_rspec spec/ -n 4 --only-group ${{ matrix.test-group }} --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
@@ -1,174 +1,19 @@
1
- AllCops:
2
- TargetRubyVersion: 2.2
3
- # RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
4
- # to ignore them, so only the ones explicitly set in this file are enabled.
5
- DisabledByDefault: true
6
- Exclude:
7
- - '**/templates/**/*'
8
- - '**/vendor/**/*'
9
- - '**/vendor/**/.*'
10
- - '**/node_modules/**/*'
11
- - 'actionpack/lib/action_dispatch/journey/parser.rb'
1
+ inherit_gem:
2
+ armitage-rubocop:
3
+ - lib/rubocop.general.yml
4
+ - lib/rubocop.rake.yml
5
+ - lib/rubocop.rspec.yml
12
6
 
13
- # Prefer assert_not_x over refute_x
14
- CustomCops/RefuteNot:
7
+ AllCops:
8
+ TargetRubyVersion: 2.7.1
15
9
  Include:
16
- - '**/test/**/*'
17
-
18
- # Prefer &&/|| over and/or.
19
- Style/AndOr:
20
- Enabled: true
21
-
22
- # Do not use braces for hash literals when they are the last argument of a
23
- # method call.
24
- Style/BracesAroundHashParameters:
25
- Enabled: true
26
- EnforcedStyle: context_dependent
27
-
28
- # Align `when` with `case`.
29
- Layout/CaseIndentation:
30
- Enabled: true
31
-
32
- # Align comments with method definitions.
33
- Layout/CommentIndentation:
34
- Enabled: true
35
-
36
- Layout/ElseAlignment:
37
- Enabled: true
38
-
39
- # Align `end` with the matching keyword or starting expression except for
40
- # assignments, where it should be aligned with the LHS.
41
- Layout/EndAlignment:
42
- Enabled: true
43
- EnforcedStyleAlignWith: variable
44
- AutoCorrect: true
45
-
46
- Layout/EmptyLineAfterMagicComment:
47
- Enabled: true
48
-
49
- # In a regular class definition, no empty lines around the body.
50
- Layout/EmptyLinesAroundClassBody:
51
- Enabled: true
52
-
53
- # In a regular method definition, no empty lines around the body.
54
- Layout/EmptyLinesAroundMethodBody:
55
- Enabled: true
10
+ - lib/**/*.rb
11
+ - spec/**/*.rb
12
+ - Gemfile
13
+ - Rakefile
14
+ - crawler_detect.gemspec
15
+ - bin/console
56
16
 
57
- # In a regular module definition, no empty lines around the body.
58
- Layout/EmptyLinesAroundModuleBody:
59
- Enabled: true
60
-
61
- Layout/FirstParameterIndentation:
62
- Enabled: true
63
-
64
- # Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
65
- Style/HashSyntax:
66
- Enabled: true
67
-
68
- # Method definitions after `private` or `protected` isolated calls need one
69
- # extra level of indentation.
70
- Layout/IndentationConsistency:
71
- Enabled: true
72
- EnforcedStyle: rails
73
-
74
- # Two spaces, no tabs (for indentation).
75
- Layout/IndentationWidth:
76
- Enabled: true
77
-
78
- Layout/LeadingCommentSpace:
79
- Enabled: true
80
-
81
- Layout/SpaceAfterColon:
82
- Enabled: true
83
-
84
- Layout/SpaceAfterComma:
85
- Enabled: true
86
-
87
- Layout/SpaceAroundEqualsInParameterDefault:
88
- Enabled: true
89
-
90
- Layout/SpaceAroundKeyword:
91
- Enabled: true
92
-
93
- Layout/SpaceAroundOperators:
94
- Enabled: true
95
-
96
- Layout/SpaceBeforeComma:
97
- Enabled: true
98
-
99
- Layout/SpaceBeforeFirstArg:
100
- Enabled: true
101
-
102
- Style/DefWithParentheses:
103
- Enabled: true
104
-
105
- # Defining a method with parameters needs parentheses.
106
- Style/MethodDefParentheses:
107
- Enabled: true
108
-
109
- Style/FrozenStringLiteralComment:
110
- Enabled: true
111
- EnforcedStyle: always
112
- Exclude:
113
- - 'actionview/test/**/*.builder'
114
- - 'actionview/test/**/*.ruby'
115
- - 'actionpack/test/**/*.builder'
116
- - 'actionpack/test/**/*.ruby'
117
- - 'activestorage/db/migrate/**/*.rb'
118
- - 'db/migrate/**/*.rb'
119
- - 'db/*.rb'
120
-
121
- # Use `foo {}` not `foo{}`.
122
- Layout/SpaceBeforeBlockBraces:
123
- Enabled: true
124
-
125
- # Use `foo { bar }` not `foo {bar}`.
126
- Layout/SpaceInsideBlockBraces:
127
- Enabled: true
128
-
129
- # Use `{ a: 1 }` not `{a:1}`.
130
- Layout/SpaceInsideHashLiteralBraces:
131
- Enabled: true
132
-
133
- Layout/SpaceInsideParens:
134
- Enabled: true
135
-
136
- # Check quotes usage according to lint rule below.
137
17
  Style/StringLiterals:
138
18
  Enabled: true
139
19
  EnforcedStyle: double_quotes
140
-
141
- # Detect hard tabs, no hard tabs.
142
- Layout/Tab:
143
- Enabled: true
144
-
145
- # Blank lines should not have any spaces.
146
- Layout/TrailingBlankLines:
147
- Enabled: true
148
-
149
- # No trailing whitespace.
150
- Layout/TrailingWhitespace:
151
- Enabled: true
152
-
153
- # Use quotes for string literals when they are enough.
154
- Style/UnneededPercentQ:
155
- Enabled: true
156
-
157
- # Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
158
- Lint/RequireParentheses:
159
- Enabled: true
160
-
161
- Lint/StringConversionInInterpolation:
162
- Enabled: true
163
-
164
- Style/RedundantReturn:
165
- Enabled: true
166
- AllowMultipleReturnValues: true
167
-
168
- Style/Semicolon:
169
- Enabled: true
170
- AllowAsExpressionSeparator: true
171
-
172
- # Prefer Foo.method over Foo::method
173
- Style/ColonMethodCall:
174
- Enabled: true
@@ -0,0 +1,36 @@
1
+ # CrawlerDetect major changes
2
+
3
+ This changelog **does not contain** raw data updates
4
+ but only major changes.
5
+
6
+ 1.1.0
7
+ ---------
8
+ - Moves to Ruby's Stdlib JSON implementation to reduce dependencies
9
+
10
+ 1.0.0
11
+ ---------
12
+ - Use raw JSON files instead of copy them to rb [#8]
13
+ - Add CrawlerDetect::Config to make it possible to have own raw files [#8]
14
+ - Add bin/update_raw_files to update raw files from PHP lib [#8]
15
+ - Add Changelog
16
+
17
+ 0.1.11
18
+ ---------
19
+ - Add thread safety [#19]
20
+
21
+ 0.1.6
22
+ ---------
23
+ - Strip crawler name [#10]
24
+
25
+ 0.1.2
26
+ ---------
27
+ - Add parallel tests [#2]
28
+
29
+ 0.1.1
30
+ ---------
31
+ - Fix: rack request
32
+
33
+ 0.1.0
34
+ ---------
35
+ - init
36
+
@@ -1,16 +1,25 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- crawler_detect (0.1.11)
4
+ crawler_detect (1.1.0)
5
+ qonfig (~> 0.24)
5
6
 
6
7
  GEM
7
8
  remote: https://rubygems.org/
8
9
  specs:
9
- activesupport (5.2.4.2)
10
+ activesupport (6.0.3.1)
10
11
  concurrent-ruby (~> 1.0, >= 1.0.2)
11
12
  i18n (>= 0.7, < 2)
12
13
  minitest (~> 5.1)
13
14
  tzinfo (~> 1.1)
15
+ zeitwerk (~> 2.2, >= 2.2.2)
16
+ armitage-rubocop (0.82.0)
17
+ rubocop (= 0.82.0)
18
+ rubocop-performance (= 1.5.2)
19
+ rubocop-rails (= 2.5.2)
20
+ rubocop-rake (= 0.5.1)
21
+ rubocop-rspec (= 1.38.1)
22
+ ast (2.4.0)
14
23
  awesome_print (1.8.0)
15
24
  byebug (11.1.2)
16
25
  coderay (1.1.2)
@@ -21,11 +30,14 @@ GEM
21
30
  ruby-progressbar (~> 1.4)
22
31
  i18n (1.8.2)
23
32
  concurrent-ruby (~> 1.0)
33
+ jaro_winkler (1.5.4)
24
34
  method_source (1.0.0)
25
- minitest (5.14.0)
35
+ minitest (5.14.1)
26
36
  parallel (1.19.1)
27
37
  parallel_tests (2.32.0)
28
38
  parallel
39
+ parser (2.7.1.2)
40
+ ast (~> 2.4.0)
29
41
  pry (0.13.1)
30
42
  coderay (~> 1.1)
31
43
  method_source (~> 1.0)
@@ -40,10 +52,13 @@ GEM
40
52
  pry-remote (0.1.8)
41
53
  pry (~> 0.9)
42
54
  slop (~> 3.0)
43
- rack (2.2.2)
55
+ qonfig (0.24.1)
56
+ rack (2.2.3)
44
57
  rack-test (1.1.0)
45
58
  rack (>= 1.0, < 3)
59
+ rainbow (3.0.0)
46
60
  rake (13.0.1)
61
+ rexml (3.2.4)
47
62
  rspec (3.9.0)
48
63
  rspec-core (~> 3.9.0)
49
64
  rspec-expectations (~> 3.9.0)
@@ -57,17 +72,38 @@ GEM
57
72
  diff-lcs (>= 1.2.0, < 2.0)
58
73
  rspec-support (~> 3.9.0)
59
74
  rspec-support (3.9.2)
75
+ rubocop (0.82.0)
76
+ jaro_winkler (~> 1.5.1)
77
+ parallel (~> 1.10)
78
+ parser (>= 2.7.0.1)
79
+ rainbow (>= 2.2.2, < 4.0)
80
+ rexml
81
+ ruby-progressbar (~> 1.7)
82
+ unicode-display_width (>= 1.4.0, < 2.0)
83
+ rubocop-performance (1.5.2)
84
+ rubocop (>= 0.71.0)
85
+ rubocop-rails (2.5.2)
86
+ activesupport
87
+ rack (>= 1.1)
88
+ rubocop (>= 0.72.0)
89
+ rubocop-rake (0.5.1)
90
+ rubocop
91
+ rubocop-rspec (1.38.1)
92
+ rubocop (>= 0.68.1)
60
93
  ruby-progressbar (1.10.1)
61
94
  slop (3.6.0)
62
95
  thread_safe (0.3.6)
63
96
  tzinfo (1.2.7)
64
97
  thread_safe (~> 0.1)
98
+ unicode-display_width (1.7.0)
99
+ zeitwerk (2.3.0)
65
100
 
66
101
  PLATFORMS
67
102
  ruby
68
103
 
69
104
  DEPENDENCIES
70
- activesupport (~> 5.2.0)
105
+ activesupport (~> 6.0.3)
106
+ armitage-rubocop (= 0.82)
71
107
  bundler (>= 1.15)
72
108
  crawler_detect!
73
109
  fuubar (~> 2.0)
@@ -78,4 +114,4 @@ DEPENDENCIES
78
114
  rspec (~> 3.0)
79
115
 
80
116
  BUNDLED WITH
81
- 2.1.3
117
+ 2.1.4
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # CrawlerDetect
2
2
 
3
- [![Build Status](https://travis-ci.org/loadkpi/crawler_detect.svg?branch=master)](https://travis-ci.org/loadkpi/crawler_detect) [![Gem Version](https://badge.fury.io/rb/crawler_detect.svg)](https://badge.fury.io/rb/crawler_detect)
3
+ ![Build](https://github.com/loadkpi/crawler_detect/workflows/build/badge.svg?branch=master) [![Gem Version](https://badge.fury.io/rb/crawler_detect.svg)](https://badge.fury.io/rb/crawler_detect)
4
4
 
5
5
  ## About
6
6
  **CrawlerDetect** is a Ruby version of PHP class @[CrawlerDetect](https://github.com/JayBizzle/Crawler-Detect).
@@ -15,44 +15,59 @@ Comparing with other popular bot-detection gems:
15
15
  | Number of checked HTTP-headers | 10 | 1 | 1 |
16
16
  | Number of updates of bot-list *(1st half of 2018)* | 14 | 1 | 7 |
17
17
 
18
+ In order to remain up-to-date, this gem does not accept any crawler data updates – any PRs to edit the crawler data should be offered to the original [JayBizzle/CrawlerDetect](https://github.com/JayBizzle/Crawler-Detect) project.
19
+
18
20
  ## Installation
19
21
  Add this line to your application's Gemfile:
20
22
 
21
23
  `gem 'crawler_detect'`
22
24
  ## Basic Usage
23
- ```
25
+ ```ruby
24
26
  CrawlerDetect.is_crawler?("Bot user agent")
25
27
  => true
26
28
  ```
27
29
  Or if you need crawler name:
28
- ```
30
+ ```ruby
29
31
  detector = CrawlerDetect.new("Googlebot/2.1 (http://www.google.com/bot.html)")
30
32
  detector.is_crawler?
31
- => true
33
+ # => true
32
34
  detector.crawler_name
33
- => "Googlebot"
35
+ # => "Googlebot"
34
36
  ```
35
37
  ## Rack::Request extension
36
38
  **Optionally** you can add additional methods for `request`:
37
- ```
39
+ ```ruby
38
40
  request.is_crawler?
39
- => false
41
+ # => false
40
42
  request.crawler_name
41
- => nil
43
+ # => nil
42
44
  ```
43
45
  It's more flexible to use `request.is_crawler?` rather than `CrawlerDetect.is_crawler?` because it automatically checks 10 HTTP-headers, not only `HTTP_USER_AGENT`.
44
46
 
45
47
  Only one thing you have to do is to configure `Rack::CrawlerDetect` midleware:
46
48
  ### Rails
47
- ```
49
+ ```ruby
48
50
  class Application < Rails::Application
49
- ...
51
+ # ...
50
52
  config.middleware.use Rack::CrawlerDetect
51
53
  end
52
54
  ```
53
- ### Rake
54
- ```
55
+ ### Rack
56
+ ```ruby
55
57
  use Rack::CrawlerDetect
56
58
  ```
59
+ ## Configuration
60
+ In some cases you may want to use your own white-list, or black-list or list of http-headers to detect User-agent.
61
+
62
+ It is possible to do via `CrawlerDetect::Config`. For example, you may have initializer like this:
63
+ ```ruby
64
+ CrawlerDetect.setup! do |config|
65
+ config.raw_headers_path = File.expand_path("crawlers/MyHeaders.json", __dir__)
66
+ config.raw_crawlers_path = File.expand_path("crawlers/MyCrawlers.json", __dir__)
67
+ config.raw_exclusions_path = File.expand_path("crawlers/MyExclusions.json", __dir__)
68
+ end
69
+ ```
70
+ Make sure that your files are correct JSON files.
71
+ Look at [the raw files](https://github.com/loadkpi/crawler_detect/tree/master/lib/crawler_detect/library/raw) which are used by default for more information.
57
72
  ## License
58
73
  MIT License