crawler_detect 0.1.12 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6e03716868421dc64e5acb715b35599776fbe3951bbeb7e462892ed3a6a51d6
4
- data.tar.gz: 10b00a68949482af5cb6064f538e0829f2bcb836fe5605da48b48f0af7bbad71
3
+ metadata.gz: 8bdca229af61e7f9a0c2cd3b417db317d0ef8a4b6635137e7c5b7c6d8244c5e7
4
+ data.tar.gz: 959bf5817c54bf37c68c2555acf1a880d0ed4fdfab5e6a1d5678a68badd80557
5
5
  SHA512:
6
- metadata.gz: cb66a7c310ca791038ca199c68626d522f99cb4ddff57382b07ecb91b824e44292362351ccb71622051ece1d8c7a900019627fed731e46f5474507b8b20f6aaa
7
- data.tar.gz: 3f4c2484f4155d975915dd21e1c62d6f8dd19ddda4023cff9530701cf628ddf94c10f5e097c3cb85ffbf3d71686bd91d9b9ac71da59cf47933549fd847d2c350
6
+ metadata.gz: f69c6e4306bd0e09dc632d1c8f7f626a7bb4cc858ff85c17eb7761466eac7a9161dc79976b66ac6817e7169c2358d662c77a9c98b51802eeb37e597b7c40a9d7
7
+ data.tar.gz: 4798889b93af2b6441f632bc53de96cbcfc4a1d5f4242eb07fb95265d4359cbf304a860eb263e361022b92416bf2cbfef83d50f10f41f69264be659dc02578ac
data/.rubocop.yml CHANGED
@@ -1,174 +1,19 @@
1
- AllCops:
2
- TargetRubyVersion: 2.2
3
- # RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
4
- # to ignore them, so only the ones explicitly set in this file are enabled.
5
- DisabledByDefault: true
6
- Exclude:
7
- - '**/templates/**/*'
8
- - '**/vendor/**/*'
9
- - '**/vendor/**/.*'
10
- - '**/node_modules/**/*'
11
- - 'actionpack/lib/action_dispatch/journey/parser.rb'
1
+ inherit_gem:
2
+ armitage-rubocop:
3
+ - lib/rubocop.general.yml
4
+ - lib/rubocop.rake.yml
5
+ - lib/rubocop.rspec.yml
12
6
 
13
- # Prefer assert_not_x over refute_x
14
- CustomCops/RefuteNot:
7
+ AllCops:
8
+ TargetRubyVersion: 2.7.1
15
9
  Include:
16
- - '**/test/**/*'
17
-
18
- # Prefer &&/|| over and/or.
19
- Style/AndOr:
20
- Enabled: true
21
-
22
- # Do not use braces for hash literals when they are the last argument of a
23
- # method call.
24
- Style/BracesAroundHashParameters:
25
- Enabled: true
26
- EnforcedStyle: context_dependent
27
-
28
- # Align `when` with `case`.
29
- Layout/CaseIndentation:
30
- Enabled: true
31
-
32
- # Align comments with method definitions.
33
- Layout/CommentIndentation:
34
- Enabled: true
35
-
36
- Layout/ElseAlignment:
37
- Enabled: true
38
-
39
- # Align `end` with the matching keyword or starting expression except for
40
- # assignments, where it should be aligned with the LHS.
41
- Layout/EndAlignment:
42
- Enabled: true
43
- EnforcedStyleAlignWith: variable
44
- AutoCorrect: true
45
-
46
- Layout/EmptyLineAfterMagicComment:
47
- Enabled: true
48
-
49
- # In a regular class definition, no empty lines around the body.
50
- Layout/EmptyLinesAroundClassBody:
51
- Enabled: true
52
-
53
- # In a regular method definition, no empty lines around the body.
54
- Layout/EmptyLinesAroundMethodBody:
55
- Enabled: true
10
+ - lib/**/*.rb
11
+ - spec/**/*.rb
12
+ - Gemfile
13
+ - Rakefile
14
+ - crawler_detect.gemspec
15
+ - bin/console
56
16
 
57
- # In a regular module definition, no empty lines around the body.
58
- Layout/EmptyLinesAroundModuleBody:
59
- Enabled: true
60
-
61
- Layout/FirstParameterIndentation:
62
- Enabled: true
63
-
64
- # Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
65
- Style/HashSyntax:
66
- Enabled: true
67
-
68
- # Method definitions after `private` or `protected` isolated calls need one
69
- # extra level of indentation.
70
- Layout/IndentationConsistency:
71
- Enabled: true
72
- EnforcedStyle: rails
73
-
74
- # Two spaces, no tabs (for indentation).
75
- Layout/IndentationWidth:
76
- Enabled: true
77
-
78
- Layout/LeadingCommentSpace:
79
- Enabled: true
80
-
81
- Layout/SpaceAfterColon:
82
- Enabled: true
83
-
84
- Layout/SpaceAfterComma:
85
- Enabled: true
86
-
87
- Layout/SpaceAroundEqualsInParameterDefault:
88
- Enabled: true
89
-
90
- Layout/SpaceAroundKeyword:
91
- Enabled: true
92
-
93
- Layout/SpaceAroundOperators:
94
- Enabled: true
95
-
96
- Layout/SpaceBeforeComma:
97
- Enabled: true
98
-
99
- Layout/SpaceBeforeFirstArg:
100
- Enabled: true
101
-
102
- Style/DefWithParentheses:
103
- Enabled: true
104
-
105
- # Defining a method with parameters needs parentheses.
106
- Style/MethodDefParentheses:
107
- Enabled: true
108
-
109
- Style/FrozenStringLiteralComment:
110
- Enabled: true
111
- EnforcedStyle: always
112
- Exclude:
113
- - 'actionview/test/**/*.builder'
114
- - 'actionview/test/**/*.ruby'
115
- - 'actionpack/test/**/*.builder'
116
- - 'actionpack/test/**/*.ruby'
117
- - 'activestorage/db/migrate/**/*.rb'
118
- - 'db/migrate/**/*.rb'
119
- - 'db/*.rb'
120
-
121
- # Use `foo {}` not `foo{}`.
122
- Layout/SpaceBeforeBlockBraces:
123
- Enabled: true
124
-
125
- # Use `foo { bar }` not `foo {bar}`.
126
- Layout/SpaceInsideBlockBraces:
127
- Enabled: true
128
-
129
- # Use `{ a: 1 }` not `{a:1}`.
130
- Layout/SpaceInsideHashLiteralBraces:
131
- Enabled: true
132
-
133
- Layout/SpaceInsideParens:
134
- Enabled: true
135
-
136
- # Check quotes usage according to lint rule below.
137
17
  Style/StringLiterals:
138
18
  Enabled: true
139
19
  EnforcedStyle: double_quotes
140
-
141
- # Detect hard tabs, no hard tabs.
142
- Layout/Tab:
143
- Enabled: true
144
-
145
- # Blank lines should not have any spaces.
146
- Layout/TrailingBlankLines:
147
- Enabled: true
148
-
149
- # No trailing whitespace.
150
- Layout/TrailingWhitespace:
151
- Enabled: true
152
-
153
- # Use quotes for string literals when they are enough.
154
- Style/UnneededPercentQ:
155
- Enabled: true
156
-
157
- # Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
158
- Lint/RequireParentheses:
159
- Enabled: true
160
-
161
- Lint/StringConversionInInterpolation:
162
- Enabled: true
163
-
164
- Style/RedundantReturn:
165
- Enabled: true
166
- AllowMultipleReturnValues: true
167
-
168
- Style/Semicolon:
169
- Enabled: true
170
- AllowAsExpressionSeparator: true
171
-
172
- # Prefer Foo.method over Foo::method
173
- Style/ColonMethodCall:
174
- Enabled: true
data/.travis.yml CHANGED
@@ -1,7 +1,9 @@
1
- ---
2
- sudo: false
1
+ os: linux
2
+ dist: xenial
3
3
  language: ruby
4
4
  cache: bundler
5
+ before_install:
6
+ - gem install bundler
5
7
  env:
6
8
  - "TEST_GROUP=1"
7
9
  - "TEST_GROUP=2"
@@ -11,6 +13,14 @@ rvm:
11
13
  - 2.5
12
14
  - 2.6
13
15
  - 2.7
14
- before_install: gem install bundler
15
- script:
16
- - bundle exec parallel_rspec spec/ -n 4 --only-group $TEST_GROUP --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
16
+ stages:
17
+ - lint
18
+ - test
19
+ script: bundle exec parallel_rspec spec/ -n 4 --only-group $TEST_GROUP --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
20
+ jobs:
21
+ fast_finish: true
22
+ include:
23
+ - stage: lint
24
+ rvm: 2.7
25
+ env: "TEST_GROUP=none"
26
+ script: bundle exec rubocop
data/CHANGELOG.md ADDED
@@ -0,0 +1,32 @@
1
+ # CrawlerDetect major changes
2
+
3
+ This changelog **does not contain** raw data updates
4
+ but only major changes.
5
+
6
+ 1.0.0
7
+ ---------
8
+ - Use raw JSON files instead of copy them to rb [#8]
9
+ - Add CrawlerDetect::Config to make it possible to have own raw files [#8]
10
+ - Add bin/update_raw_files to update raw files from PHP lib [#8]
11
+ - Add Changelog
12
+
13
+ 0.1.11
14
+ ---------
15
+ - Add thread safety [#19]
16
+
17
+ 0.1.6
18
+ ---------
19
+ - Strip crawler name [#10]
20
+
21
+ 0.1.2
22
+ ---------
23
+ - Add parallel tests [#2]
24
+
25
+ 0.1.1
26
+ ---------
27
+ - Fix: rack request
28
+
29
+ 0.1.0
30
+ ---------
31
+ - init
32
+
data/Gemfile.lock CHANGED
@@ -1,7 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- crawler_detect (0.1.11)
4
+ crawler_detect (1.0.0)
5
+ oj (>= 3.0)
6
+ qonfig (~> 0.24)
5
7
 
6
8
  GEM
7
9
  remote: https://rubygems.org/
@@ -11,6 +13,13 @@ GEM
11
13
  i18n (>= 0.7, < 2)
12
14
  minitest (~> 5.1)
13
15
  tzinfo (~> 1.1)
16
+ armitage-rubocop (0.82.0)
17
+ rubocop (= 0.82.0)
18
+ rubocop-performance (= 1.5.2)
19
+ rubocop-rails (= 2.5.2)
20
+ rubocop-rake (= 0.5.1)
21
+ rubocop-rspec (= 1.38.1)
22
+ ast (2.4.0)
14
23
  awesome_print (1.8.0)
15
24
  byebug (11.1.2)
16
25
  coderay (1.1.2)
@@ -21,11 +30,15 @@ GEM
21
30
  ruby-progressbar (~> 1.4)
22
31
  i18n (1.8.2)
23
32
  concurrent-ruby (~> 1.0)
33
+ jaro_winkler (1.5.4)
24
34
  method_source (1.0.0)
25
35
  minitest (5.14.0)
36
+ oj (3.10.6)
26
37
  parallel (1.19.1)
27
38
  parallel_tests (2.32.0)
28
39
  parallel
40
+ parser (2.7.1.2)
41
+ ast (~> 2.4.0)
29
42
  pry (0.13.1)
30
43
  coderay (~> 1.1)
31
44
  method_source (~> 1.0)
@@ -40,10 +53,13 @@ GEM
40
53
  pry-remote (0.1.8)
41
54
  pry (~> 0.9)
42
55
  slop (~> 3.0)
56
+ qonfig (0.24.1)
43
57
  rack (2.2.2)
44
58
  rack-test (1.1.0)
45
59
  rack (>= 1.0, < 3)
60
+ rainbow (3.0.0)
46
61
  rake (13.0.1)
62
+ rexml (3.2.4)
47
63
  rspec (3.9.0)
48
64
  rspec-core (~> 3.9.0)
49
65
  rspec-expectations (~> 3.9.0)
@@ -57,17 +73,37 @@ GEM
57
73
  diff-lcs (>= 1.2.0, < 2.0)
58
74
  rspec-support (~> 3.9.0)
59
75
  rspec-support (3.9.2)
76
+ rubocop (0.82.0)
77
+ jaro_winkler (~> 1.5.1)
78
+ parallel (~> 1.10)
79
+ parser (>= 2.7.0.1)
80
+ rainbow (>= 2.2.2, < 4.0)
81
+ rexml
82
+ ruby-progressbar (~> 1.7)
83
+ unicode-display_width (>= 1.4.0, < 2.0)
84
+ rubocop-performance (1.5.2)
85
+ rubocop (>= 0.71.0)
86
+ rubocop-rails (2.5.2)
87
+ activesupport
88
+ rack (>= 1.1)
89
+ rubocop (>= 0.72.0)
90
+ rubocop-rake (0.5.1)
91
+ rubocop
92
+ rubocop-rspec (1.38.1)
93
+ rubocop (>= 0.68.1)
60
94
  ruby-progressbar (1.10.1)
61
95
  slop (3.6.0)
62
96
  thread_safe (0.3.6)
63
97
  tzinfo (1.2.7)
64
98
  thread_safe (~> 0.1)
99
+ unicode-display_width (1.7.0)
65
100
 
66
101
  PLATFORMS
67
102
  ruby
68
103
 
69
104
  DEPENDENCIES
70
105
  activesupport (~> 5.2.0)
106
+ armitage-rubocop (= 0.82)
71
107
  bundler (>= 1.15)
72
108
  crawler_detect!
73
109
  fuubar (~> 2.0)
@@ -78,4 +114,4 @@ DEPENDENCIES
78
114
  rspec (~> 3.0)
79
115
 
80
116
  BUNDLED WITH
81
- 2.1.3
117
+ 2.1.4
data/README.md CHANGED
@@ -54,5 +54,18 @@ end
54
54
  ```
55
55
  use Rack::CrawlerDetect
56
56
  ```
57
+ ## Configuration
58
+ In some cases you may want to use your own white-list, or black-list or list of http-headers to detect User-agent.
59
+
60
+ It is possible to do via `CrawlerDetect::Config`. For example, you may have initializer like this:
61
+ ```
62
+ CrawlerDetect.setup! do |config|
63
+ config.raw_headers_path = File.expand_path("crawlers/MyHeaders.json", __dir__)
64
+ config.raw_crawlers_path = File.expand_path("crawlers/MyCrawlers.json", __dir__)
65
+ config.raw_exclusions_path = File.expand_path("crawlers/MyExclusions.json", __dir__)
66
+ end
67
+ ```
68
+ Make sure that your files are correct JSON files.
69
+ Look at [the raw files](https://github.com/loadkpi/crawler_detect/tree/master/lib/crawler_detect/library/raw) which are used by default for more information.
57
70
  ## License
58
71
  MIT License
@@ -0,0 +1,10 @@
1
+ #!/bin/bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
4
+
5
+ wget -O $DIR/../lib/crawler_detect/library/raw/Crawlers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.json
6
+ wget -O $DIR/../lib/crawler_detect/library/raw/Exclusions.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Exclusions.json
7
+ wget -O $DIR/../lib/crawler_detect/library/raw/Headers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Headers.json
8
+
9
+ wget -O $DIR/../spec/fixtures/crawlers.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/crawlers.txt
10
+ wget -O $DIR/../spec/fixtures/devices.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/devices.txt
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path("lib", __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "crawler_detect/version"
6
6
 
@@ -17,13 +17,16 @@ Gem::Specification.new do |spec|
17
17
 
18
18
  # Specify which files should be added to the gem when it is released.
19
19
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
- spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
23
  spec.bindir = "exe"
24
24
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ["lib"]
26
26
 
27
+ spec.add_dependency "oj", ">= 3.0"
28
+ spec.add_dependency "qonfig", "~> 0.24"
29
+
27
30
  spec.add_development_dependency "activesupport", "~> 5.2.0"
28
31
  spec.add_development_dependency "bundler", ">= 1.15"
29
32
  spec.add_development_dependency "fuubar", "~> 2.0"
@@ -32,4 +35,5 @@ Gem::Specification.new do |spec|
32
35
  spec.add_development_dependency "rack-test", "~> 1.1"
33
36
  spec.add_development_dependency "rake", ">= 10.0"
34
37
  spec.add_development_dependency "rspec", "~> 3.0"
38
+ spec.add_development_dependency "armitage-rubocop", "0.82"
35
39
  end
@@ -1,24 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "crawler_detect/detector"
4
- require "crawler_detect/library"
5
- require "crawler_detect/library/crawlers"
6
- require "crawler_detect/library/exclusions"
7
- require "crawler_detect/library/headers"
8
- require "crawler_detect/version"
3
+ require "oj"
4
+ require "qonfig"
9
5
 
10
- require "rack/crawler_detect"
6
+ require_relative "crawler_detect/config"
7
+ require_relative "crawler_detect/detector"
8
+ require_relative "crawler_detect/library/loader"
9
+ require_relative "crawler_detect/library/crawlers"
10
+ require_relative "crawler_detect/library/exclusions"
11
+ require_relative "crawler_detect/library/headers"
12
+ require_relative "crawler_detect/library"
13
+ require_relative "crawler_detect/version"
14
+ require_relative "rack/crawler_detect"
11
15
 
16
+ # @since 0.1.0
12
17
  module CrawlerDetect
13
18
  class << self
19
+ # @param user_agent [String] User-agent string to detect
20
+ # @return [CrawlerDetect::Detector] Instance of detector class
14
21
  def new(user_agent)
15
22
  detector(user_agent)
16
23
  end
17
24
 
25
+ # @param user_agent [String] User-agent string to detect
26
+ # @return [true, false] Is User-agent a crawler?
18
27
  def is_crawler?(user_agent)
19
28
  detector(user_agent).is_crawler?
20
29
  end
21
30
 
31
+ # @since 1.0.0
32
+ # @param config [Proc]
33
+ def setup!(&config)
34
+ @config = CrawlerDetect::Config.new(&config)
35
+ Library::DATA_CLASSES.each(&:reload_data)
36
+ end
37
+
38
+ # @since 1.0.0
39
+ # @return [CrawlerDetect::Config] Instance of configuration class
40
+ def config
41
+ @config ||= CrawlerDetect::Config.new
42
+ end
43
+
22
44
  private
23
45
 
24
46
  def detector(user_agent)