crawler_detect 0.1.11 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +13 -168
- data/.travis.yml +16 -6
- data/CHANGELOG.md +32 -0
- data/Gemfile.lock +117 -0
- data/README.md +13 -0
- data/bin/update_raw_files +10 -0
- data/crawler_detect.gemspec +6 -3
- data/lib/crawler_detect.rb +29 -7
- data/lib/crawler_detect/config.rb +29 -0
- data/lib/crawler_detect/detector.rb +27 -14
- data/lib/crawler_detect/library.rb +9 -3
- data/lib/crawler_detect/library/crawlers.rb +6 -1253
- data/lib/crawler_detect/library/exclusions.rb +6 -50
- data/lib/crawler_detect/library/headers.rb +6 -17
- data/lib/crawler_detect/library/loader.rb +18 -0
- data/lib/crawler_detect/library/raw/Crawlers.json +1 -0
- data/lib/crawler_detect/library/raw/Exclusions.json +1 -0
- data/lib/crawler_detect/library/raw/Headers.json +1 -0
- data/lib/crawler_detect/version.rb +2 -1
- data/lib/rack/crawler_detect.rb +20 -17
- metadata +41 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6fed2e5d51ebe5a84f9efd591c8ec11eeb7b8ed6d73c06680972482040fbb2e3
|
4
|
+
data.tar.gz: 90c32dbab1d592fcb95641f06f995454dace41148c30ca7006082c57c0eeca79
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13a526e036c48f35e70e6d82a1270d934dc741e125ca106ce290fae1adf8e207e001b3f1d4a240a8778bdc55f45e2d929973fb8e134edeeb7be37dfb62971acd
|
7
|
+
data.tar.gz: af479ed4af6732062abe1ae6678d6a8f3afd38c1ae1231ceef7bd97d8ee5ee3cf0f4cd8aabaf17b61860410a9f3d9f93633c26f1f7084527380bb77ef1e71e86
|
data/.rubocop.yml
CHANGED
@@ -1,174 +1,19 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
Exclude:
|
7
|
-
- '**/templates/**/*'
|
8
|
-
- '**/vendor/**/*'
|
9
|
-
- '**/vendor/**/.*'
|
10
|
-
- '**/node_modules/**/*'
|
11
|
-
- 'actionpack/lib/action_dispatch/journey/parser.rb'
|
1
|
+
inherit_gem:
|
2
|
+
armitage-rubocop:
|
3
|
+
- lib/rubocop.general.yml
|
4
|
+
- lib/rubocop.rake.yml
|
5
|
+
- lib/rubocop.rspec.yml
|
12
6
|
|
13
|
-
|
14
|
-
|
7
|
+
AllCops:
|
8
|
+
TargetRubyVersion: 2.7.1
|
15
9
|
Include:
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# Do not use braces for hash literals when they are the last argument of a
|
23
|
-
# method call.
|
24
|
-
Style/BracesAroundHashParameters:
|
25
|
-
Enabled: true
|
26
|
-
EnforcedStyle: context_dependent
|
27
|
-
|
28
|
-
# Align `when` with `case`.
|
29
|
-
Layout/CaseIndentation:
|
30
|
-
Enabled: true
|
31
|
-
|
32
|
-
# Align comments with method definitions.
|
33
|
-
Layout/CommentIndentation:
|
34
|
-
Enabled: true
|
35
|
-
|
36
|
-
Layout/ElseAlignment:
|
37
|
-
Enabled: true
|
38
|
-
|
39
|
-
# Align `end` with the matching keyword or starting expression except for
|
40
|
-
# assignments, where it should be aligned with the LHS.
|
41
|
-
Layout/EndAlignment:
|
42
|
-
Enabled: true
|
43
|
-
EnforcedStyleAlignWith: variable
|
44
|
-
AutoCorrect: true
|
45
|
-
|
46
|
-
Layout/EmptyLineAfterMagicComment:
|
47
|
-
Enabled: true
|
48
|
-
|
49
|
-
# In a regular class definition, no empty lines around the body.
|
50
|
-
Layout/EmptyLinesAroundClassBody:
|
51
|
-
Enabled: true
|
52
|
-
|
53
|
-
# In a regular method definition, no empty lines around the body.
|
54
|
-
Layout/EmptyLinesAroundMethodBody:
|
55
|
-
Enabled: true
|
10
|
+
- lib/**/*.rb
|
11
|
+
- spec/**/*.rb
|
12
|
+
- Gemfile
|
13
|
+
- Rakefile
|
14
|
+
- crawler_detect.gemspec
|
15
|
+
- bin/console
|
56
16
|
|
57
|
-
# In a regular module definition, no empty lines around the body.
|
58
|
-
Layout/EmptyLinesAroundModuleBody:
|
59
|
-
Enabled: true
|
60
|
-
|
61
|
-
Layout/FirstParameterIndentation:
|
62
|
-
Enabled: true
|
63
|
-
|
64
|
-
# Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
|
65
|
-
Style/HashSyntax:
|
66
|
-
Enabled: true
|
67
|
-
|
68
|
-
# Method definitions after `private` or `protected` isolated calls need one
|
69
|
-
# extra level of indentation.
|
70
|
-
Layout/IndentationConsistency:
|
71
|
-
Enabled: true
|
72
|
-
EnforcedStyle: rails
|
73
|
-
|
74
|
-
# Two spaces, no tabs (for indentation).
|
75
|
-
Layout/IndentationWidth:
|
76
|
-
Enabled: true
|
77
|
-
|
78
|
-
Layout/LeadingCommentSpace:
|
79
|
-
Enabled: true
|
80
|
-
|
81
|
-
Layout/SpaceAfterColon:
|
82
|
-
Enabled: true
|
83
|
-
|
84
|
-
Layout/SpaceAfterComma:
|
85
|
-
Enabled: true
|
86
|
-
|
87
|
-
Layout/SpaceAroundEqualsInParameterDefault:
|
88
|
-
Enabled: true
|
89
|
-
|
90
|
-
Layout/SpaceAroundKeyword:
|
91
|
-
Enabled: true
|
92
|
-
|
93
|
-
Layout/SpaceAroundOperators:
|
94
|
-
Enabled: true
|
95
|
-
|
96
|
-
Layout/SpaceBeforeComma:
|
97
|
-
Enabled: true
|
98
|
-
|
99
|
-
Layout/SpaceBeforeFirstArg:
|
100
|
-
Enabled: true
|
101
|
-
|
102
|
-
Style/DefWithParentheses:
|
103
|
-
Enabled: true
|
104
|
-
|
105
|
-
# Defining a method with parameters needs parentheses.
|
106
|
-
Style/MethodDefParentheses:
|
107
|
-
Enabled: true
|
108
|
-
|
109
|
-
Style/FrozenStringLiteralComment:
|
110
|
-
Enabled: true
|
111
|
-
EnforcedStyle: always
|
112
|
-
Exclude:
|
113
|
-
- 'actionview/test/**/*.builder'
|
114
|
-
- 'actionview/test/**/*.ruby'
|
115
|
-
- 'actionpack/test/**/*.builder'
|
116
|
-
- 'actionpack/test/**/*.ruby'
|
117
|
-
- 'activestorage/db/migrate/**/*.rb'
|
118
|
-
- 'db/migrate/**/*.rb'
|
119
|
-
- 'db/*.rb'
|
120
|
-
|
121
|
-
# Use `foo {}` not `foo{}`.
|
122
|
-
Layout/SpaceBeforeBlockBraces:
|
123
|
-
Enabled: true
|
124
|
-
|
125
|
-
# Use `foo { bar }` not `foo {bar}`.
|
126
|
-
Layout/SpaceInsideBlockBraces:
|
127
|
-
Enabled: true
|
128
|
-
|
129
|
-
# Use `{ a: 1 }` not `{a:1}`.
|
130
|
-
Layout/SpaceInsideHashLiteralBraces:
|
131
|
-
Enabled: true
|
132
|
-
|
133
|
-
Layout/SpaceInsideParens:
|
134
|
-
Enabled: true
|
135
|
-
|
136
|
-
# Check quotes usage according to lint rule below.
|
137
17
|
Style/StringLiterals:
|
138
18
|
Enabled: true
|
139
19
|
EnforcedStyle: double_quotes
|
140
|
-
|
141
|
-
# Detect hard tabs, no hard tabs.
|
142
|
-
Layout/Tab:
|
143
|
-
Enabled: true
|
144
|
-
|
145
|
-
# Blank lines should not have any spaces.
|
146
|
-
Layout/TrailingBlankLines:
|
147
|
-
Enabled: true
|
148
|
-
|
149
|
-
# No trailing whitespace.
|
150
|
-
Layout/TrailingWhitespace:
|
151
|
-
Enabled: true
|
152
|
-
|
153
|
-
# Use quotes for string literals when they are enough.
|
154
|
-
Style/UnneededPercentQ:
|
155
|
-
Enabled: true
|
156
|
-
|
157
|
-
# Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
|
158
|
-
Lint/RequireParentheses:
|
159
|
-
Enabled: true
|
160
|
-
|
161
|
-
Lint/StringConversionInInterpolation:
|
162
|
-
Enabled: true
|
163
|
-
|
164
|
-
Style/RedundantReturn:
|
165
|
-
Enabled: true
|
166
|
-
AllowMultipleReturnValues: true
|
167
|
-
|
168
|
-
Style/Semicolon:
|
169
|
-
Enabled: true
|
170
|
-
AllowAsExpressionSeparator: true
|
171
|
-
|
172
|
-
# Prefer Foo.method over Foo::method
|
173
|
-
Style/ColonMethodCall:
|
174
|
-
Enabled: true
|
data/.travis.yml
CHANGED
@@ -1,16 +1,26 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
os: linux
|
2
|
+
dist: xenial
|
3
3
|
language: ruby
|
4
4
|
cache: bundler
|
5
|
+
before_install:
|
6
|
+
- gem install bundler
|
5
7
|
env:
|
6
8
|
- "TEST_GROUP=1"
|
7
9
|
- "TEST_GROUP=2"
|
8
10
|
- "TEST_GROUP=3"
|
9
11
|
- "TEST_GROUP=4"
|
10
12
|
rvm:
|
11
|
-
- 2.3
|
12
13
|
- 2.5
|
13
14
|
- 2.6
|
14
|
-
|
15
|
-
|
16
|
-
-
|
15
|
+
- 2.7
|
16
|
+
stages:
|
17
|
+
- lint
|
18
|
+
- test
|
19
|
+
script: bundle exec parallel_rspec spec/ -n 4 --only-group $TEST_GROUP --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
|
20
|
+
jobs:
|
21
|
+
fast_finish: true
|
22
|
+
include:
|
23
|
+
- stage: lint
|
24
|
+
rvm: 2.7
|
25
|
+
env: "TEST_GROUP=none"
|
26
|
+
script: bundle exec rubocop
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# CrawlerDetect major changes
|
2
|
+
|
3
|
+
This changelog **does not contain** raw data updates
|
4
|
+
but only major changes.
|
5
|
+
|
6
|
+
1.0.0
|
7
|
+
---------
|
8
|
+
- Use raw JSON files instead of copy them to rb [#8]
|
9
|
+
- Add CrawlerDetect::Config to make it possible to have own raw files [#8]
|
10
|
+
- Add bin/update_raw_files to update raw files from PHP lib [#8]
|
11
|
+
- Add Changelog
|
12
|
+
|
13
|
+
0.1.11
|
14
|
+
---------
|
15
|
+
- Add thread safety [#19]
|
16
|
+
|
17
|
+
0.1.6
|
18
|
+
---------
|
19
|
+
- Strip crawler name [#10]
|
20
|
+
|
21
|
+
0.1.2
|
22
|
+
---------
|
23
|
+
- Add parallel tests [#2]
|
24
|
+
|
25
|
+
0.1.1
|
26
|
+
---------
|
27
|
+
- Fix: rack request
|
28
|
+
|
29
|
+
0.1.0
|
30
|
+
---------
|
31
|
+
- init
|
32
|
+
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
crawler_detect (1.0.2)
|
5
|
+
qonfig (~> 0.24)
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
activesupport (6.0.3.1)
|
11
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
12
|
+
i18n (>= 0.7, < 2)
|
13
|
+
minitest (~> 5.1)
|
14
|
+
tzinfo (~> 1.1)
|
15
|
+
zeitwerk (~> 2.2, >= 2.2.2)
|
16
|
+
armitage-rubocop (0.82.0)
|
17
|
+
rubocop (= 0.82.0)
|
18
|
+
rubocop-performance (= 1.5.2)
|
19
|
+
rubocop-rails (= 2.5.2)
|
20
|
+
rubocop-rake (= 0.5.1)
|
21
|
+
rubocop-rspec (= 1.38.1)
|
22
|
+
ast (2.4.0)
|
23
|
+
awesome_print (1.8.0)
|
24
|
+
byebug (11.1.2)
|
25
|
+
coderay (1.1.2)
|
26
|
+
concurrent-ruby (1.1.6)
|
27
|
+
diff-lcs (1.3)
|
28
|
+
fuubar (2.5.0)
|
29
|
+
rspec-core (~> 3.0)
|
30
|
+
ruby-progressbar (~> 1.4)
|
31
|
+
i18n (1.8.2)
|
32
|
+
concurrent-ruby (~> 1.0)
|
33
|
+
jaro_winkler (1.5.4)
|
34
|
+
method_source (1.0.0)
|
35
|
+
minitest (5.14.1)
|
36
|
+
parallel (1.19.1)
|
37
|
+
parallel_tests (2.32.0)
|
38
|
+
parallel
|
39
|
+
parser (2.7.1.2)
|
40
|
+
ast (~> 2.4.0)
|
41
|
+
pry (0.13.1)
|
42
|
+
coderay (~> 1.1)
|
43
|
+
method_source (~> 1.0)
|
44
|
+
pry-byebug (3.9.0)
|
45
|
+
byebug (~> 11.0)
|
46
|
+
pry (~> 0.13.0)
|
47
|
+
pry-meta (0.0.10)
|
48
|
+
awesome_print
|
49
|
+
pry
|
50
|
+
pry-byebug
|
51
|
+
pry-remote
|
52
|
+
pry-remote (0.1.8)
|
53
|
+
pry (~> 0.9)
|
54
|
+
slop (~> 3.0)
|
55
|
+
qonfig (0.24.1)
|
56
|
+
rack (2.2.3)
|
57
|
+
rack-test (1.1.0)
|
58
|
+
rack (>= 1.0, < 3)
|
59
|
+
rainbow (3.0.0)
|
60
|
+
rake (13.0.1)
|
61
|
+
rexml (3.2.4)
|
62
|
+
rspec (3.9.0)
|
63
|
+
rspec-core (~> 3.9.0)
|
64
|
+
rspec-expectations (~> 3.9.0)
|
65
|
+
rspec-mocks (~> 3.9.0)
|
66
|
+
rspec-core (3.9.1)
|
67
|
+
rspec-support (~> 3.9.1)
|
68
|
+
rspec-expectations (3.9.1)
|
69
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
70
|
+
rspec-support (~> 3.9.0)
|
71
|
+
rspec-mocks (3.9.1)
|
72
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
73
|
+
rspec-support (~> 3.9.0)
|
74
|
+
rspec-support (3.9.2)
|
75
|
+
rubocop (0.82.0)
|
76
|
+
jaro_winkler (~> 1.5.1)
|
77
|
+
parallel (~> 1.10)
|
78
|
+
parser (>= 2.7.0.1)
|
79
|
+
rainbow (>= 2.2.2, < 4.0)
|
80
|
+
rexml
|
81
|
+
ruby-progressbar (~> 1.7)
|
82
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
83
|
+
rubocop-performance (1.5.2)
|
84
|
+
rubocop (>= 0.71.0)
|
85
|
+
rubocop-rails (2.5.2)
|
86
|
+
activesupport
|
87
|
+
rack (>= 1.1)
|
88
|
+
rubocop (>= 0.72.0)
|
89
|
+
rubocop-rake (0.5.1)
|
90
|
+
rubocop
|
91
|
+
rubocop-rspec (1.38.1)
|
92
|
+
rubocop (>= 0.68.1)
|
93
|
+
ruby-progressbar (1.10.1)
|
94
|
+
slop (3.6.0)
|
95
|
+
thread_safe (0.3.6)
|
96
|
+
tzinfo (1.2.7)
|
97
|
+
thread_safe (~> 0.1)
|
98
|
+
unicode-display_width (1.7.0)
|
99
|
+
zeitwerk (2.3.0)
|
100
|
+
|
101
|
+
PLATFORMS
|
102
|
+
ruby
|
103
|
+
|
104
|
+
DEPENDENCIES
|
105
|
+
activesupport (~> 6.0.3)
|
106
|
+
armitage-rubocop (= 0.82)
|
107
|
+
bundler (>= 1.15)
|
108
|
+
crawler_detect!
|
109
|
+
fuubar (~> 2.0)
|
110
|
+
parallel_tests (~> 2.0)
|
111
|
+
pry-meta (~> 0.0.10)
|
112
|
+
rack-test (~> 1.1)
|
113
|
+
rake (>= 10.0)
|
114
|
+
rspec (~> 3.0)
|
115
|
+
|
116
|
+
BUNDLED WITH
|
117
|
+
2.1.4
|
data/README.md
CHANGED
@@ -54,5 +54,18 @@ end
|
|
54
54
|
```
|
55
55
|
use Rack::CrawlerDetect
|
56
56
|
```
|
57
|
+
## Configuration
|
58
|
+
In some cases you may want to use your own white-list, or black-list or list of http-headers to detect User-agent.
|
59
|
+
|
60
|
+
It is possible to do via `CrawlerDetect::Config`. For example, you may have initializer like this:
|
61
|
+
```
|
62
|
+
CrawlerDetect.setup! do |config|
|
63
|
+
config.raw_headers_path = File.expand_path("crawlers/MyHeaders.json", __dir__)
|
64
|
+
config.raw_crawlers_path = File.expand_path("crawlers/MyCrawlers.json", __dir__)
|
65
|
+
config.raw_exclusions_path = File.expand_path("crawlers/MyExclusions.json", __dir__)
|
66
|
+
end
|
67
|
+
```
|
68
|
+
Make sure that your files are correct JSON files.
|
69
|
+
Look at [the raw files](https://github.com/loadkpi/crawler_detect/tree/master/lib/crawler_detect/library/raw) which are used by default for more information.
|
57
70
|
## License
|
58
71
|
MIT License
|
@@ -0,0 +1,10 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
4
|
+
|
5
|
+
wget -O $DIR/../lib/crawler_detect/library/raw/Crawlers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.json
|
6
|
+
wget -O $DIR/../lib/crawler_detect/library/raw/Exclusions.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Exclusions.json
|
7
|
+
wget -O $DIR/../lib/crawler_detect/library/raw/Headers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Headers.json
|
8
|
+
|
9
|
+
wget -O $DIR/../spec/fixtures/crawlers.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/crawlers.txt
|
10
|
+
wget -O $DIR/../spec/fixtures/devices.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/devices.txt
|
data/crawler_detect.gemspec
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
lib = File.expand_path("
|
3
|
+
lib = File.expand_path("lib", __dir__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
require "crawler_detect/version"
|
6
6
|
|
@@ -17,14 +17,16 @@ Gem::Specification.new do |spec|
|
|
17
17
|
|
18
18
|
# Specify which files should be added to the gem when it is released.
|
19
19
|
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
20
|
-
spec.files = Dir.chdir(File.expand_path(
|
20
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
21
21
|
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
22
22
|
end
|
23
23
|
spec.bindir = "exe"
|
24
24
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
25
25
|
spec.require_paths = ["lib"]
|
26
26
|
|
27
|
-
spec.
|
27
|
+
spec.add_dependency "qonfig", "~> 0.24"
|
28
|
+
|
29
|
+
spec.add_development_dependency "activesupport", "~> 6.0.3"
|
28
30
|
spec.add_development_dependency "bundler", ">= 1.15"
|
29
31
|
spec.add_development_dependency "fuubar", "~> 2.0"
|
30
32
|
spec.add_development_dependency "parallel_tests", "~> 2.0"
|
@@ -32,4 +34,5 @@ Gem::Specification.new do |spec|
|
|
32
34
|
spec.add_development_dependency "rack-test", "~> 1.1"
|
33
35
|
spec.add_development_dependency "rake", ">= 10.0"
|
34
36
|
spec.add_development_dependency "rspec", "~> 3.0"
|
37
|
+
spec.add_development_dependency "armitage-rubocop", "0.82"
|
35
38
|
end
|