human_power 0.0.6 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6d42507a07817c999567723432bcfc334acf0915
4
- data.tar.gz: 16944ca90ecea9a3aa797787d03f321ed8cf49ef
3
+ metadata.gz: e0c1c83f98908134117b2b4aab263d7624fbdd8c
4
+ data.tar.gz: 622632e2dd563dacc9c1da16c45bc7c39063f7d2
5
5
  SHA512:
6
- metadata.gz: 6687a32753b3261d45b899f02f14c3f7ba9a5195467f05281ec9e4acbdc43dde51bfe375f562bfac62800a7e8517a89f29b7c9d0643cdf58d7ebb6d6a98bb345
7
- data.tar.gz: 836e6e7500a71d531eaf93c255f22074537b9a442752b51e4742b7987862ab45e958c9061935e8c1c80a52e5dc1cfdd01dfa2a06891f537a1bf466f3044412b1
6
+ metadata.gz: 17b7c4f6212ec8cd7c7b163cc4852490ae979a1c9c85f0186bf4fedcfba7b8846dfc442245c1bf89568101638a7e93c0d4d9921dc02b3544a25684842bfabec4
7
+ data.tar.gz: f0cec68d61d6e3fb520816e3add83874c6a61ed1c289ef3fefe342a60597b1e2d5c4437b7ac51aaf750330c2ef6ea6442f91c6c574804ca7e5fb6765c9d33b3c
data/README.md CHANGED
@@ -77,6 +77,31 @@ Then visit `/robots.txt` in your browser.
77
77
  Please see [user_agents.yml](https://github.com/lassebunk/human_power/blob/master/user_agents.yml) for a list of 170+ built-in user agents/crawlers you can use like shown above.
78
78
  The list is from [UserAgentString.com](http://www.useragentstring.com/pages/Crawlerlist/).
79
79
 
80
+ ### Bot detection
81
+
82
+ You can use the `HumanPower.is_bot?` method to check if a user agent is a known bot / crawler:
83
+
84
+ ```ruby
85
+ # Googlebot
86
+ ua = "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)"
87
+ HumanPower.is_bot?(ua) # => true
88
+
89
+ # Chrome
90
+ ua = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36"
91
+ HumanPower.is_bot?(ua) # => false
92
+
93
+ # in Rails
94
+ HumanPower.is_bot?(request.user_agent) # => performs check on current user agent
95
+ ```
96
+
97
+ ### Regular expression
98
+
99
+ If you need to get a regular expression for bot detection, you can use:
100
+
101
+ ```ruby
102
+ HumanPower.bot_regex # => regular expression that matches all known bots / crawlers
103
+ ```
104
+
80
105
  ## Caveats
81
106
 
82
107
  Human Power is great for adding rules to your robots.txt.
@@ -24,6 +24,19 @@ module HumanPower
24
24
  @user_agents ||= load_user_agents
25
25
  end
26
26
 
27
+ # Regular expression to match bot user agents.
28
+ def bot_regex
29
+ @bot_regex ||= begin
30
+ escaped_values = user_agents.values.map { |ua| Regexp.escape(ua) }
31
+ /#{escaped_values.join("|")}/i
32
+ end
33
+ end
34
+
35
+ # Returns +true+ if a given user agent is a bot.
36
+ def is_bot?(user_agent)
37
+ !!(user_agent =~ bot_regex)
38
+ end
39
+
27
40
  private
28
41
 
29
42
  # Loads the built-in user agents from crawlers.yml.
@@ -1,3 +1,3 @@
1
1
  module HumanPower
2
- VERSION = "0.0.6"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -0,0 +1,43 @@
1
+ require 'test_helper'
2
+
3
+ class GeneratorTest < ActionView::TestCase
4
+ test "bot regex matches" do
5
+ bot_user_agents.each do |ua|
6
+ assert_match HumanPower.bot_regex, ua
7
+ end
8
+ end
9
+
10
+ test "bot regex browser matches" do
11
+ browser_user_agents.each do |ua|
12
+ assert_no_match HumanPower.bot_regex, ua
13
+ end
14
+ end
15
+
16
+ test "bot detection" do
17
+ bot_user_agents.each do |ua|
18
+ assert HumanPower.is_bot?(ua)
19
+ end
20
+ end
21
+
22
+ test "browser detection" do
23
+ browser_user_agents.each do |ua|
24
+ assert !HumanPower.is_bot?(ua)
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def bot_user_agents
31
+ ["Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
32
+ "Googlebot/2.1 (+http://www.googlebot.com/bot.html)",
33
+ "Mozilla/4.0 (compatible; B-l-i-t-z-B-O-T)",
34
+ "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"]
35
+ end
36
+
37
+ def browser_user_agents
38
+ ["Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0",
39
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36",
40
+ "Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25",
41
+ "Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0"]
42
+ end
43
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: human_power
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lasse Bunk
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-14 00:00:00.000000000 Z
11
+ date: 2014-05-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -91,6 +91,7 @@ files:
91
91
  - lib/human_power/rails/engine.rb
92
92
  - lib/human_power/rule.rb
93
93
  - lib/human_power/version.rb
94
+ - test/bot_detection_test.rb
94
95
  - test/dummy/README.rdoc
95
96
  - test/dummy/Rakefile
96
97
  - test/dummy/app/assets/images/.keep
@@ -159,6 +160,7 @@ signing_key:
159
160
  specification_version: 4
160
161
  summary: Easy generation of robots.txt. Force the robots into submission!
161
162
  test_files:
163
+ - test/bot_detection_test.rb
162
164
  - test/dummy/README.rdoc
163
165
  - test/dummy/Rakefile
164
166
  - test/dummy/app/assets/images/.keep