human_power 0.0.6 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d42507a07817c999567723432bcfc334acf0915
4
- data.tar.gz: 16944ca90ecea9a3aa797787d03f321ed8cf49ef
3
+ metadata.gz: e0c1c83f98908134117b2b4aab263d7624fbdd8c
4
+ data.tar.gz: 622632e2dd563dacc9c1da16c45bc7c39063f7d2
5
5
  SHA512:
6
- metadata.gz: 6687a32753b3261d45b899f02f14c3f7ba9a5195467f05281ec9e4acbdc43dde51bfe375f562bfac62800a7e8517a89f29b7c9d0643cdf58d7ebb6d6a98bb345
7
- data.tar.gz: 836e6e7500a71d531eaf93c255f22074537b9a442752b51e4742b7987862ab45e958c9061935e8c1c80a52e5dc1cfdd01dfa2a06891f537a1bf466f3044412b1
6
+ metadata.gz: 17b7c4f6212ec8cd7c7b163cc4852490ae979a1c9c85f0186bf4fedcfba7b8846dfc442245c1bf89568101638a7e93c0d4d9921dc02b3544a25684842bfabec4
7
+ data.tar.gz: f0cec68d61d6e3fb520816e3add83874c6a61ed1c289ef3fefe342a60597b1e2d5c4437b7ac51aaf750330c2ef6ea6442f91c6c574804ca7e5fb6765c9d33b3c
data/README.md CHANGED
@@ -77,6 +77,31 @@ Then visit `/robots.txt` in your browser.
77
77
  Please see [user_agents.yml](https://github.com/lassebunk/human_power/blob/master/user_agents.yml) for a list of 170+ built-in user agents/crawlers you can use like shown above.
78
78
  The list is from [UserAgentString.com](http://www.useragentstring.com/pages/Crawlerlist/).
79
79
 
80
+ ### Bot detection
81
+
82
+ You can use the `HumanPower.is_bot?` method to check if a user agent is a known bot / crawler:
83
+
84
+ ```ruby
85
+ # Googlebot
86
+ ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
87
+ HumanPower.is_bot?(ua) # => true
88
+
89
+ # Chrome
90
+ ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36"
91
+ HumanPower.is_bot?(ua) # => false
92
+
93
+ # in Rails
94
+ HumanPower.is_bot?(request.user_agent) # => performs check on current user agent
95
+ ```
96
+
97
+ ### Regular expression
98
+
99
+ If you need to get a regular expression for bot detection, you can use:
100
+
101
+ ```ruby
102
+ HumanPower.bot_regex # => regular expression that matches all known bots / crawlers
103
+ ```
104
+
80
105
  ## Caveats
81
106
 
82
107
  Human Power is great for adding rules to your robots.txt.
@@ -24,6 +24,19 @@ module HumanPower
24
24
  @user_agents ||= load_user_agents
25
25
  end
26
26
 
27
+ # Regular expression to match bot user agents.
28
+ def bot_regex
29
+ @bot_regex ||= begin
30
+ escaped_values = user_agents.values.map { |ua| Regexp.escape(ua) }
31
+ /#{escaped_values.join("|")}/i
32
+ end
33
+ end
34
+
35
+ # Returns +true+ if a given user agent is a bot.
36
+ def is_bot?(user_agent)
37
+ !!(user_agent =~ bot_regex)
38
+ end
39
+
27
40
  private
28
41
 
29
42
  # Loads the built-in user agents from crawlers.yml.
@@ -1,3 +1,3 @@
1
1
  module HumanPower
2
- VERSION = "0.0.6"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,43 @@
1
+ require 'test_helper'
2
+
3
+ class GeneratorTest < ActionView::TestCase
4
+ test "bot regex matches" do
5
+ bot_user_agents.each do |ua|
6
+ assert_match HumanPower.bot_regex, ua
7
+ end
8
+ end
9
+
10
+ test "bot regex browser matches" do
11
+ browser_user_agents.each do |ua|
12
+ assert_no_match HumanPower.bot_regex, ua
13
+ end
14
+ end
15
+
16
+ test "bot detection" do
17
+ bot_user_agents.each do |ua|
18
+ assert HumanPower.is_bot?(ua)
19
+ end
20
+ end
21
+
22
+ test "browser detection" do
23
+ browser_user_agents.each do |ua|
24
+ assert !HumanPower.is_bot?(ua)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def bot_user_agents
31
+ ["Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
32
+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
33
+ "Mozilla/4.0 (compatible; B-l-i-t-z-B-O-T)",
34
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"]
35
+ end
36
+
37
+ def browser_user_agents
38
+ ["Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
39
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
40
+ "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25",
41
+ "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0"]
42
+ end
43
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: human_power
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lasse Bunk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-14 00:00:00.000000000 Z
11
+ date: 2014-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -91,6 +91,7 @@ files:
91
91
  - lib/human_power/rails/engine.rb
92
92
  - lib/human_power/rule.rb
93
93
  - lib/human_power/version.rb
94
+ - test/bot_detection_test.rb
94
95
  - test/dummy/README.rdoc
95
96
  - test/dummy/Rakefile
96
97
  - test/dummy/app/assets/images/.keep
@@ -159,6 +160,7 @@ signing_key:
159
160
  specification_version: 4
160
161
  summary: Easy generation of robots.txt. Force the robots into submission!
161
162
  test_files:
163
+ - test/bot_detection_test.rb
162
164
  - test/dummy/README.rdoc
163
165
  - test/dummy/Rakefile
164
166
  - test/dummy/app/assets/images/.keep