isbot 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +7 -4
- data/data/data.json +2 -19
- data/isbot.gemspec +3 -3
- data/tests/isbot_test.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7b83b0e8115e87e8fb63b459e43ce3d9e8eea777
|
|
4
|
+
data.tar.gz: 783b1ed2273e59143392c320325f27d45c8e509c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: de5383079b16b6cdee5dfeacb4cc464a3946adc061a88a45f9f343326f1cda54b4aba2fb3b96be24d3df200e9e663c0fa3b2665d9d81b659ed1c5ce8cf8109fa
|
|
7
|
+
data.tar.gz: 4666f8c5f99776739091c4c61699d2d36b939e38f16a11e5160a9eece5e189f6413cd137c4d0fcba433ff504423d26083076a3092e3fd6d22a84e881dba16b7a
|
data/README.md
CHANGED
|
@@ -25,13 +25,16 @@ is_bot user_agent do |match_bot|
|
|
|
25
25
|
end
|
|
26
26
|
````
|
|
27
27
|
|
|
28
|
-
Add a
|
|
28
|
+
Add a spider User-Agent field:
|
|
29
29
|
|
|
30
30
|
```` ruby
|
|
31
|
-
user_agent = 'Mozilla/5.0 (compatible; MyBot/1.0; +http://my.me/bot.html'
|
|
32
|
-
IsBot::add_ua_field 'MyBot'
|
|
33
31
|
|
|
34
|
-
user_agent.
|
|
32
|
+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google (+https://developers.google.com/+/web/snippet/)'
|
|
33
|
+
|
|
34
|
+
# Add Goole Keep User-Agent match field:
|
|
35
|
+
IsBot::add_ua_field 'developers\.google\.com/\+/web/snippet/'
|
|
36
|
+
|
|
37
|
+
assert_true user_agent.is_bot? # pass
|
|
35
38
|
````
|
|
36
39
|
#### Attached:
|
|
37
40
|
|
data/data/data.json
CHANGED
|
@@ -170,9 +170,6 @@
|
|
|
170
170
|
{
|
|
171
171
|
"pattern": "MJ12bot"
|
|
172
172
|
},
|
|
173
|
-
{
|
|
174
|
-
"pattern": "dotbot"
|
|
175
|
-
},
|
|
176
173
|
{
|
|
177
174
|
"pattern": "woriobot"
|
|
178
175
|
},
|
|
@@ -544,11 +541,6 @@
|
|
|
544
541
|
"addition_date": "2014/03/31",
|
|
545
542
|
"url": "http://scan-interfax.ru"
|
|
546
543
|
},
|
|
547
|
-
{
|
|
548
|
-
"pattern": "Lipperhey SEO Service",
|
|
549
|
-
"addition_date": "2014/04/01",
|
|
550
|
-
"url": "http://www.lipperhey.com/"
|
|
551
|
-
},
|
|
552
544
|
{
|
|
553
545
|
"pattern": "CC Metadata Scaper",
|
|
554
546
|
"addition_date": "2014/04/01",
|
|
@@ -594,14 +586,10 @@
|
|
|
594
586
|
"addition_date": "2014/09/12",
|
|
595
587
|
"instances": ["SimpleCrawler/0.1" ]
|
|
596
588
|
},
|
|
597
|
-
{
|
|
598
|
-
"pattern": "Livelapbot",
|
|
599
|
-
"addition_date": "2014/09/12",
|
|
600
|
-
"instances": ["Livelapbot/0.1" ]
|
|
601
|
-
},
|
|
602
589
|
{
|
|
603
590
|
"pattern": "Twitterbot",
|
|
604
591
|
"addition_date": "2014/09/12",
|
|
592
|
+
"url": "https://dev.twitter.com/cards/getting-started",
|
|
605
593
|
"instances": ["Twitterbot/0.1", "Twitterbot/1.0" ]
|
|
606
594
|
},
|
|
607
595
|
{
|
|
@@ -639,11 +627,6 @@
|
|
|
639
627
|
"url": "https://developers.facebook.com/docs/sharing/best-practices#crawl",
|
|
640
628
|
"addition_date": "2014/12/30"
|
|
641
629
|
},
|
|
642
|
-
{
|
|
643
|
-
"pattern": "Twitterbot",
|
|
644
|
-
"url": "https://dev.twitter.com/cards/getting-started",
|
|
645
|
-
"addition_date": "2014/12/30"
|
|
646
|
-
},
|
|
647
630
|
{
|
|
648
631
|
"pattern": "OrangeBot",
|
|
649
632
|
"instances": ["Mozilla/5.0 (compatible; OrangeBot/2.0; support.orangebot@orange.com"],
|
|
@@ -789,7 +772,7 @@
|
|
|
789
772
|
{
|
|
790
773
|
"pattern": "LivelapBot",
|
|
791
774
|
"url": "http://site.livelap.com/crawler",
|
|
792
|
-
"instances": ["LivelapBot/0.2 (http://site.livelap.com/crawler)"],
|
|
775
|
+
"instances": ["Livelapbot/0.1", "LivelapBot/0.2 (http://site.livelap.com/crawler)"],
|
|
793
776
|
"addition_date": "2016/01/02"
|
|
794
777
|
},
|
|
795
778
|
{
|
data/isbot.gemspec
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
s.name = 'isbot'
|
|
3
|
-
s.version = '0.1.
|
|
3
|
+
s.version = '0.1.4'
|
|
4
4
|
s.executables << 'isbot'
|
|
5
5
|
s.date = '2017-05-21'
|
|
6
|
-
s.summary = 'detects bots/crawlers/spiders via the user agent
|
|
7
|
-
s.description = 'A simple library for detecting bots/crawlers/spiders through User-Agent strings
|
|
6
|
+
s.summary = 'detects bots/crawlers/spiders via the user agent'
|
|
7
|
+
s.description = 'A simple library for detecting bots/crawlers/spiders through User-Agent strings'
|
|
8
8
|
s.authors = ['Hentioe']
|
|
9
9
|
s.email = 'meow.i5.br@gmail.com'
|
|
10
10
|
s.files = Dir['**/*']
|
data/tests/isbot_test.rb
CHANGED
|
@@ -21,8 +21,8 @@ class IsBotTest < Test::Unit::TestCase
|
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def test_add_ua_field
|
|
24
|
-
user_agent = 'Mozilla/5.0 (
|
|
25
|
-
IsBot::add_ua_field '
|
|
24
|
+
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 Google (+https://developers.google.com/+/web/snippet/)'
|
|
25
|
+
IsBot::add_ua_field 'developers\.google\.com/\+/web/snippet/'
|
|
26
26
|
|
|
27
27
|
assert_true user_agent.is_bot?
|
|
28
28
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: isbot
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hentioe
|
|
@@ -11,7 +11,7 @@ cert_chain: []
|
|
|
11
11
|
date: 2017-05-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: A simple library for detecting bots/crawlers/spiders through User-Agent
|
|
14
|
-
strings
|
|
14
|
+
strings
|
|
15
15
|
email: meow.i5.br@gmail.com
|
|
16
16
|
executables:
|
|
17
17
|
- isbot
|
|
@@ -50,5 +50,5 @@ rubyforge_project:
|
|
|
50
50
|
rubygems_version: 2.5.1
|
|
51
51
|
signing_key:
|
|
52
52
|
specification_version: 4
|
|
53
|
-
summary: detects bots/crawlers/spiders via the user agent
|
|
53
|
+
summary: detects bots/crawlers/spiders via the user agent
|
|
54
54
|
test_files: []
|