crawler_detect 0.1.12 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f6e03716868421dc64e5acb715b35599776fbe3951bbeb7e462892ed3a6a51d6
4
- data.tar.gz: 10b00a68949482af5cb6064f538e0829f2bcb836fe5605da48b48f0af7bbad71
3
+ metadata.gz: 8bdca229af61e7f9a0c2cd3b417db317d0ef8a4b6635137e7c5b7c6d8244c5e7
4
+ data.tar.gz: 959bf5817c54bf37c68c2555acf1a880d0ed4fdfab5e6a1d5678a68badd80557
5
5
  SHA512:
6
- metadata.gz: cb66a7c310ca791038ca199c68626d522f99cb4ddff57382b07ecb91b824e44292362351ccb71622051ece1d8c7a900019627fed731e46f5474507b8b20f6aaa
7
- data.tar.gz: 3f4c2484f4155d975915dd21e1c62d6f8dd19ddda4023cff9530701cf628ddf94c10f5e097c3cb85ffbf3d71686bd91d9b9ac71da59cf47933549fd847d2c350
6
+ metadata.gz: f69c6e4306bd0e09dc632d1c8f7f626a7bb4cc858ff85c17eb7761466eac7a9161dc79976b66ac6817e7169c2358d662c77a9c98b51802eeb37e597b7c40a9d7
7
+ data.tar.gz: 4798889b93af2b6441f632bc53de96cbcfc4a1d5f4242eb07fb95265d4359cbf304a860eb263e361022b92416bf2cbfef83d50f10f41f69264be659dc02578ac
data/.rubocop.yml CHANGED
@@ -1,174 +1,19 @@
1
- AllCops:
2
- TargetRubyVersion: 2.2
3
- # RuboCop has a bunch of cops enabled by default. This setting tells RuboCop
4
- # to ignore them, so only the ones explicitly set in this file are enabled.
5
- DisabledByDefault: true
6
- Exclude:
7
- - '**/templates/**/*'
8
- - '**/vendor/**/*'
9
- - '**/vendor/**/.*'
10
- - '**/node_modules/**/*'
11
- - 'actionpack/lib/action_dispatch/journey/parser.rb'
1
+ inherit_gem:
2
+ armitage-rubocop:
3
+ - lib/rubocop.general.yml
4
+ - lib/rubocop.rake.yml
5
+ - lib/rubocop.rspec.yml
12
6
 
13
- # Prefer assert_not_x over refute_x
14
- CustomCops/RefuteNot:
7
+ AllCops:
8
+ TargetRubyVersion: 2.7.1
15
9
  Include:
16
- - '**/test/**/*'
17
-
18
- # Prefer &&/|| over and/or.
19
- Style/AndOr:
20
- Enabled: true
21
-
22
- # Do not use braces for hash literals when they are the last argument of a
23
- # method call.
24
- Style/BracesAroundHashParameters:
25
- Enabled: true
26
- EnforcedStyle: context_dependent
27
-
28
- # Align `when` with `case`.
29
- Layout/CaseIndentation:
30
- Enabled: true
31
-
32
- # Align comments with method definitions.
33
- Layout/CommentIndentation:
34
- Enabled: true
35
-
36
- Layout/ElseAlignment:
37
- Enabled: true
38
-
39
- # Align `end` with the matching keyword or starting expression except for
40
- # assignments, where it should be aligned with the LHS.
41
- Layout/EndAlignment:
42
- Enabled: true
43
- EnforcedStyleAlignWith: variable
44
- AutoCorrect: true
45
-
46
- Layout/EmptyLineAfterMagicComment:
47
- Enabled: true
48
-
49
- # In a regular class definition, no empty lines around the body.
50
- Layout/EmptyLinesAroundClassBody:
51
- Enabled: true
52
-
53
- # In a regular method definition, no empty lines around the body.
54
- Layout/EmptyLinesAroundMethodBody:
55
- Enabled: true
10
+ - lib/**/*.rb
11
+ - spec/**/*.rb
12
+ - Gemfile
13
+ - Rakefile
14
+ - crawler_detect.gemspec
15
+ - bin/console
56
16
 
57
- # In a regular module definition, no empty lines around the body.
58
- Layout/EmptyLinesAroundModuleBody:
59
- Enabled: true
60
-
61
- Layout/FirstParameterIndentation:
62
- Enabled: true
63
-
64
- # Use Ruby >= 1.9 syntax for hashes. Prefer { a: :b } over { :a => :b }.
65
- Style/HashSyntax:
66
- Enabled: true
67
-
68
- # Method definitions after `private` or `protected` isolated calls need one
69
- # extra level of indentation.
70
- Layout/IndentationConsistency:
71
- Enabled: true
72
- EnforcedStyle: rails
73
-
74
- # Two spaces, no tabs (for indentation).
75
- Layout/IndentationWidth:
76
- Enabled: true
77
-
78
- Layout/LeadingCommentSpace:
79
- Enabled: true
80
-
81
- Layout/SpaceAfterColon:
82
- Enabled: true
83
-
84
- Layout/SpaceAfterComma:
85
- Enabled: true
86
-
87
- Layout/SpaceAroundEqualsInParameterDefault:
88
- Enabled: true
89
-
90
- Layout/SpaceAroundKeyword:
91
- Enabled: true
92
-
93
- Layout/SpaceAroundOperators:
94
- Enabled: true
95
-
96
- Layout/SpaceBeforeComma:
97
- Enabled: true
98
-
99
- Layout/SpaceBeforeFirstArg:
100
- Enabled: true
101
-
102
- Style/DefWithParentheses:
103
- Enabled: true
104
-
105
- # Defining a method with parameters needs parentheses.
106
- Style/MethodDefParentheses:
107
- Enabled: true
108
-
109
- Style/FrozenStringLiteralComment:
110
- Enabled: true
111
- EnforcedStyle: always
112
- Exclude:
113
- - 'actionview/test/**/*.builder'
114
- - 'actionview/test/**/*.ruby'
115
- - 'actionpack/test/**/*.builder'
116
- - 'actionpack/test/**/*.ruby'
117
- - 'activestorage/db/migrate/**/*.rb'
118
- - 'db/migrate/**/*.rb'
119
- - 'db/*.rb'
120
-
121
- # Use `foo {}` not `foo{}`.
122
- Layout/SpaceBeforeBlockBraces:
123
- Enabled: true
124
-
125
- # Use `foo { bar }` not `foo {bar}`.
126
- Layout/SpaceInsideBlockBraces:
127
- Enabled: true
128
-
129
- # Use `{ a: 1 }` not `{a:1}`.
130
- Layout/SpaceInsideHashLiteralBraces:
131
- Enabled: true
132
-
133
- Layout/SpaceInsideParens:
134
- Enabled: true
135
-
136
- # Check quotes usage according to lint rule below.
137
17
  Style/StringLiterals:
138
18
  Enabled: true
139
19
  EnforcedStyle: double_quotes
140
-
141
- # Detect hard tabs, no hard tabs.
142
- Layout/Tab:
143
- Enabled: true
144
-
145
- # Blank lines should not have any spaces.
146
- Layout/TrailingBlankLines:
147
- Enabled: true
148
-
149
- # No trailing whitespace.
150
- Layout/TrailingWhitespace:
151
- Enabled: true
152
-
153
- # Use quotes for string literals when they are enough.
154
- Style/UnneededPercentQ:
155
- Enabled: true
156
-
157
- # Use my_method(my_arg) not my_method( my_arg ) or my_method my_arg.
158
- Lint/RequireParentheses:
159
- Enabled: true
160
-
161
- Lint/StringConversionInInterpolation:
162
- Enabled: true
163
-
164
- Style/RedundantReturn:
165
- Enabled: true
166
- AllowMultipleReturnValues: true
167
-
168
- Style/Semicolon:
169
- Enabled: true
170
- AllowAsExpressionSeparator: true
171
-
172
- # Prefer Foo.method over Foo::method
173
- Style/ColonMethodCall:
174
- Enabled: true
data/.travis.yml CHANGED
@@ -1,7 +1,9 @@
1
- ---
2
- sudo: false
1
+ os: linux
2
+ dist: xenial
3
3
  language: ruby
4
4
  cache: bundler
5
+ before_install:
6
+ - gem install bundler
5
7
  env:
6
8
  - "TEST_GROUP=1"
7
9
  - "TEST_GROUP=2"
@@ -11,6 +13,14 @@ rvm:
11
13
  - 2.5
12
14
  - 2.6
13
15
  - 2.7
14
- before_install: gem install bundler
15
- script:
16
- - bundle exec parallel_rspec spec/ -n 4 --only-group $TEST_GROUP --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
16
+ stages:
17
+ - lint
18
+ - test
19
+ script: bundle exec parallel_rspec spec/ -n 4 --only-group $TEST_GROUP --group-by runtime --runtime-log spec/fixtures/parallel_runtime_rspec.log
20
+ jobs:
21
+ fast_finish: true
22
+ include:
23
+ - stage: lint
24
+ rvm: 2.7
25
+ env: "TEST_GROUP=none"
26
+ script: bundle exec rubocop
data/CHANGELOG.md ADDED
@@ -0,0 +1,32 @@
1
+ # CrawlerDetect major changes
2
+
3
+ This changelog **does not contain** raw data updates
4
+ but only major changes.
5
+
6
+ 1.0.0
7
+ ---------
8
+ - Use raw JSON files instead of copy them to rb [#8]
9
+ - Add CrawlerDetect::Config to make it possible to have own raw files [#8]
10
+ - Add bin/update_raw_files to update raw files from PHP lib [#8]
11
+ - Add Changelog
12
+
13
+ 0.1.11
14
+ ---------
15
+ - Add thread safety [#19]
16
+
17
+ 0.1.6
18
+ ---------
19
+ - Strip crawler name [#10]
20
+
21
+ 0.1.2
22
+ ---------
23
+ - Add parallel tests [#2]
24
+
25
+ 0.1.1
26
+ ---------
27
+ - Fix: rack request
28
+
29
+ 0.1.0
30
+ ---------
31
+ - init
32
+
data/Gemfile.lock CHANGED
@@ -1,7 +1,9 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- crawler_detect (0.1.11)
4
+ crawler_detect (1.0.0)
5
+ oj (>= 3.0)
6
+ qonfig (~> 0.24)
5
7
 
6
8
  GEM
7
9
  remote: https://rubygems.org/
@@ -11,6 +13,13 @@ GEM
11
13
  i18n (>= 0.7, < 2)
12
14
  minitest (~> 5.1)
13
15
  tzinfo (~> 1.1)
16
+ armitage-rubocop (0.82.0)
17
+ rubocop (= 0.82.0)
18
+ rubocop-performance (= 1.5.2)
19
+ rubocop-rails (= 2.5.2)
20
+ rubocop-rake (= 0.5.1)
21
+ rubocop-rspec (= 1.38.1)
22
+ ast (2.4.0)
14
23
  awesome_print (1.8.0)
15
24
  byebug (11.1.2)
16
25
  coderay (1.1.2)
@@ -21,11 +30,15 @@ GEM
21
30
  ruby-progressbar (~> 1.4)
22
31
  i18n (1.8.2)
23
32
  concurrent-ruby (~> 1.0)
33
+ jaro_winkler (1.5.4)
24
34
  method_source (1.0.0)
25
35
  minitest (5.14.0)
36
+ oj (3.10.6)
26
37
  parallel (1.19.1)
27
38
  parallel_tests (2.32.0)
28
39
  parallel
40
+ parser (2.7.1.2)
41
+ ast (~> 2.4.0)
29
42
  pry (0.13.1)
30
43
  coderay (~> 1.1)
31
44
  method_source (~> 1.0)
@@ -40,10 +53,13 @@ GEM
40
53
  pry-remote (0.1.8)
41
54
  pry (~> 0.9)
42
55
  slop (~> 3.0)
56
+ qonfig (0.24.1)
43
57
  rack (2.2.2)
44
58
  rack-test (1.1.0)
45
59
  rack (>= 1.0, < 3)
60
+ rainbow (3.0.0)
46
61
  rake (13.0.1)
62
+ rexml (3.2.4)
47
63
  rspec (3.9.0)
48
64
  rspec-core (~> 3.9.0)
49
65
  rspec-expectations (~> 3.9.0)
@@ -57,17 +73,37 @@ GEM
57
73
  diff-lcs (>= 1.2.0, < 2.0)
58
74
  rspec-support (~> 3.9.0)
59
75
  rspec-support (3.9.2)
76
+ rubocop (0.82.0)
77
+ jaro_winkler (~> 1.5.1)
78
+ parallel (~> 1.10)
79
+ parser (>= 2.7.0.1)
80
+ rainbow (>= 2.2.2, < 4.0)
81
+ rexml
82
+ ruby-progressbar (~> 1.7)
83
+ unicode-display_width (>= 1.4.0, < 2.0)
84
+ rubocop-performance (1.5.2)
85
+ rubocop (>= 0.71.0)
86
+ rubocop-rails (2.5.2)
87
+ activesupport
88
+ rack (>= 1.1)
89
+ rubocop (>= 0.72.0)
90
+ rubocop-rake (0.5.1)
91
+ rubocop
92
+ rubocop-rspec (1.38.1)
93
+ rubocop (>= 0.68.1)
60
94
  ruby-progressbar (1.10.1)
61
95
  slop (3.6.0)
62
96
  thread_safe (0.3.6)
63
97
  tzinfo (1.2.7)
64
98
  thread_safe (~> 0.1)
99
+ unicode-display_width (1.7.0)
65
100
 
66
101
  PLATFORMS
67
102
  ruby
68
103
 
69
104
  DEPENDENCIES
70
105
  activesupport (~> 5.2.0)
106
+ armitage-rubocop (= 0.82)
71
107
  bundler (>= 1.15)
72
108
  crawler_detect!
73
109
  fuubar (~> 2.0)
@@ -78,4 +114,4 @@ DEPENDENCIES
78
114
  rspec (~> 3.0)
79
115
 
80
116
  BUNDLED WITH
81
- 2.1.3
117
+ 2.1.4
data/README.md CHANGED
@@ -54,5 +54,18 @@ end
54
54
  ```
55
55
  use Rack::CrawlerDetect
56
56
  ```
57
+ ## Configuration
58
+ In some cases you may want to use your own white-list, or black-list or list of http-headers to detect User-agent.
59
+
60
+ It is possible to do via `CrawlerDetect::Config`. For example, you may have initializer like this:
61
+ ```
62
+ CrawlerDetect.setup! do |config|
63
+ config.raw_headers_path = File.expand_path("crawlers/MyHeaders.json", __dir__)
64
+ config.raw_crawlers_path = File.expand_path("crawlers/MyCrawlers.json", __dir__)
65
+ config.raw_exclusions_path = File.expand_path("crawlers/MyExclusions.json", __dir__)
66
+ end
67
+ ```
68
+ Make sure that your files are correct JSON files.
69
+ Look at [the raw files](https://github.com/loadkpi/crawler_detect/tree/master/lib/crawler_detect/library/raw) which are used by default for more information.
57
70
  ## License
58
71
  MIT License
@@ -0,0 +1,10 @@
1
+ #!/bin/bash
2
+
3
+ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
4
+
5
+ wget -O $DIR/../lib/crawler_detect/library/raw/Crawlers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Crawlers.json
6
+ wget -O $DIR/../lib/crawler_detect/library/raw/Exclusions.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Exclusions.json
7
+ wget -O $DIR/../lib/crawler_detect/library/raw/Headers.json https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/raw/Headers.json
8
+
9
+ wget -O $DIR/../spec/fixtures/crawlers.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/crawlers.txt
10
+ wget -O $DIR/../spec/fixtures/devices.txt https://raw.githubusercontent.com/JayBizzle/Crawler-Detect/master/tests/devices.txt
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- lib = File.expand_path("../lib", __FILE__)
3
+ lib = File.expand_path("lib", __dir__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
  require "crawler_detect/version"
6
6
 
@@ -17,13 +17,16 @@ Gem::Specification.new do |spec|
17
17
 
18
18
  # Specify which files should be added to the gem when it is released.
19
19
  # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
20
- spec.files = Dir.chdir(File.expand_path("..", __FILE__)) do
20
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
21
21
  `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
22
22
  end
23
23
  spec.bindir = "exe"
24
24
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
25
25
  spec.require_paths = ["lib"]
26
26
 
27
+ spec.add_dependency "oj", ">= 3.0"
28
+ spec.add_dependency "qonfig", "~> 0.24"
29
+
27
30
  spec.add_development_dependency "activesupport", "~> 5.2.0"
28
31
  spec.add_development_dependency "bundler", ">= 1.15"
29
32
  spec.add_development_dependency "fuubar", "~> 2.0"
@@ -32,4 +35,5 @@ Gem::Specification.new do |spec|
32
35
  spec.add_development_dependency "rack-test", "~> 1.1"
33
36
  spec.add_development_dependency "rake", ">= 10.0"
34
37
  spec.add_development_dependency "rspec", "~> 3.0"
38
+ spec.add_development_dependency "armitage-rubocop", "0.82"
35
39
  end
@@ -1,24 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "crawler_detect/detector"
4
- require "crawler_detect/library"
5
- require "crawler_detect/library/crawlers"
6
- require "crawler_detect/library/exclusions"
7
- require "crawler_detect/library/headers"
8
- require "crawler_detect/version"
3
+ require "oj"
4
+ require "qonfig"
9
5
 
10
- require "rack/crawler_detect"
6
+ require_relative "crawler_detect/config"
7
+ require_relative "crawler_detect/detector"
8
+ require_relative "crawler_detect/library/loader"
9
+ require_relative "crawler_detect/library/crawlers"
10
+ require_relative "crawler_detect/library/exclusions"
11
+ require_relative "crawler_detect/library/headers"
12
+ require_relative "crawler_detect/library"
13
+ require_relative "crawler_detect/version"
14
+ require_relative "rack/crawler_detect"
11
15
 
16
+ # @since 0.1.0
12
17
  module CrawlerDetect
13
18
  class << self
19
+ # @param user_agent [String] User-agent string to detect
20
+ # @return [CrawlerDetect::Detector] Instance of detector class
14
21
  def new(user_agent)
15
22
  detector(user_agent)
16
23
  end
17
24
 
25
+ # @param user_agent [String] User-agent string to detect
26
+ # @return [true, false] Is User-agent a crawler?
18
27
  def is_crawler?(user_agent)
19
28
  detector(user_agent).is_crawler?
20
29
  end
21
30
 
31
+ # @since 1.0.0
32
+ # @param config [Proc]
33
+ def setup!(&config)
34
+ @config = CrawlerDetect::Config.new(&config)
35
+ Library::DATA_CLASSES.each(&:reload_data)
36
+ end
37
+
38
+ # @since 1.0.0
39
+ # @return [CrawlerDetect::Config] Instance of configuration class
40
+ def config
41
+ @config ||= CrawlerDetect::Config.new
42
+ end
43
+
22
44
  private
23
45
 
24
46
  def detector(user_agent)