voight_kampff 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/Gemfile +1 -1
- data/README.md +3 -1
- data/config/crawler-user-agents.json +137 -5
- data/lib/voight_kampff/version.rb +1 -1
- data/lib/voight_kampff.rb +1 -1
- data/voight_kampff.gemspec +2 -2
- metadata +6 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b9028e3e1f6c4036ff821a6b118a163559adcf4
|
4
|
+
data.tar.gz: ecf384b5da997f04e49a26c9a696e88d23962607
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5a8f3d00a08969bb6cdf83a358faf4c8a66a7d8f3a0c609eeb1bfebe2d79a6fdb1404111cef18e26e145ea2243f773a7196e0253d8e2ae212b1f77718224533
|
7
|
+
data.tar.gz: b4c52b7f4fabbe2f69a71392afa0b51e6cf57e518b83c90686be03c5854664e65ae111a9a02ffaf046bc08cd15fa345548816e40e7cbe1d64ecb2088b60fc8d8
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.1
|
1
|
+
2.3.1
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
Voight-Kampff
|
2
2
|
=============
|
3
|
-
[![Build Status](https://travis-ci.org/biola/Voight-Kampff.
|
3
|
+
[![Build Status](https://travis-ci.org/biola/Voight-Kampff.svg?branch=master)](https://travis-ci.org/biola/Voight-Kampff)
|
4
|
+
[![Code Climate](https://codeclimate.com/github/biola/Voight-Kampff/badges/gpa.svg)](https://codeclimate.com/github/biola/Voight-Kampff)
|
5
|
+
[![Gem Version](https://badge.fury.io/rb/voight_kampff.svg)](https://badge.fury.io/rb/voight_kampff)
|
4
6
|
|
5
7
|
Voight-Kampff relies on a [user agent](http://en.wikipedia.org/wiki/User_agent) list for its detection. It can easily tell you if a request is coming from a crawler, spider or bot. This can be especially helpful in analytics such as page hit tracking.
|
6
8
|
|
@@ -1,7 +1,6 @@
|
|
1
|
-
|
2
1
|
[
|
3
2
|
{
|
4
|
-
"pattern": "
|
3
|
+
"pattern": "Googlebot\\/",
|
5
4
|
"url": "http://www.google.com/bot.html"
|
6
5
|
},
|
7
6
|
{
|
@@ -10,6 +9,16 @@
|
|
10
9
|
{
|
11
10
|
"pattern": "Googlebot-Image"
|
12
11
|
},
|
12
|
+
{
|
13
|
+
"pattern": "Googlebot-News"
|
14
|
+
},
|
15
|
+
{
|
16
|
+
"pattern": "Googlebot-Video"
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"pattern": "AdsBot-Google",
|
20
|
+
"url": "https://support.google.com/webmasters/answer/1061943?hl=en"
|
21
|
+
},
|
13
22
|
{
|
14
23
|
"pattern": "Mediapartners-Google",
|
15
24
|
"url": "https://support.google.com/webmasters/answer/1061943?hl=en"
|
@@ -46,6 +55,12 @@
|
|
46
55
|
{
|
47
56
|
"pattern": "nutch"
|
48
57
|
},
|
58
|
+
{
|
59
|
+
"pattern": "Go-http-client",
|
60
|
+
"addition_date": "2016/03/26",
|
61
|
+
"url": "https://golang.org/pkg/net/http/",
|
62
|
+
"instances": ["Go-http-client/1.1"]
|
63
|
+
},
|
49
64
|
{
|
50
65
|
"pattern": "phpcrawl",
|
51
66
|
"addition_date": "2012-09/17",
|
@@ -76,9 +91,14 @@
|
|
76
91
|
{
|
77
92
|
"pattern": "seekbot"
|
78
93
|
},
|
94
|
+
{
|
95
|
+
"pattern": "gigabot",
|
96
|
+
"instances": ["Gigabot/1.0", "Gigabot/2.0 (http://www.gigablast.com/spider.html)", "Gigabot/2.0 (http://www.gigablast.com/spider.html)"],
|
97
|
+
"url": "https://github.com/gigablast/open-source-search-engine"
|
98
|
+
},
|
79
99
|
{
|
80
100
|
"pattern": "gigablast",
|
81
|
-
"instances": ["
|
101
|
+
"instances": ["GigablastOpenSource/1.0"],
|
82
102
|
"url": "https://github.com/gigablast/open-source-search-engine"
|
83
103
|
},
|
84
104
|
{
|
@@ -745,6 +765,118 @@
|
|
745
765
|
"pattern": "AddThis",
|
746
766
|
"url": "https://www.addthis.com",
|
747
767
|
"instances": ["AddThis.com robot tech.support@clearspring.com"],
|
748
|
-
|
749
|
-
}
|
768
|
+
"addition_date": "2015/06/02"
|
769
|
+
},
|
770
|
+
{
|
771
|
+
"pattern": "Screaming Frog SEO Spider",
|
772
|
+
"url": "http://www.screamingfrog.co.uk/seo-spider",
|
773
|
+
"instances": ["Screaming Frog SEO Spider/5.1"],
|
774
|
+
"addition_date": "2016/01/08"
|
775
|
+
},
|
776
|
+
{
|
777
|
+
"pattern": "MetaURI",
|
778
|
+
"url": "http://www.useragentstring.com/MetaURI_id_17683.php",
|
779
|
+
"instances": ["MetaURI API/2.0 +metauri.com"],
|
780
|
+
"addition_date": "2016/01/02"
|
781
|
+
},
|
782
|
+
{
|
783
|
+
"pattern": "Scrapy",
|
784
|
+
"url": "http://scrapy.org/",
|
785
|
+
"instances": ["Scrapy/1.0.3 (+http://scrapy.org)"],
|
786
|
+
"addition_date": "2016/01/02"
|
787
|
+
},
|
788
|
+
{
|
789
|
+
"pattern": "LivelapBot",
|
790
|
+
"url": "http://site.livelap.com/crawler",
|
791
|
+
"instances": ["LivelapBot/0.2 (http://site.livelap.com/crawler)"],
|
792
|
+
"addition_date": "2016/01/02"
|
793
|
+
},
|
794
|
+
{
|
795
|
+
"pattern": "OpenHoseBot",
|
796
|
+
"url": "http://www.openhose.org/bot.html",
|
797
|
+
"instances": ["Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)"],
|
798
|
+
"addition_date": "2016/01/02"
|
799
|
+
},
|
800
|
+
{
|
801
|
+
"pattern": "CapsuleChecker",
|
802
|
+
"url": "http://www.capsulink.com/about",
|
803
|
+
"instances": ["CapsuleChecker (http://www.capsulink.com/)"],
|
804
|
+
"addition_date": "2016/01/02"
|
805
|
+
},
|
806
|
+
{
|
807
|
+
"pattern": "collection@infegy.com",
|
808
|
+
"url": "http://infegy.com/",
|
809
|
+
"instances": ["Mozilla/5.0 (compatible) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.73 Safari/537.36 collection@infegy.com"],
|
810
|
+
"addition_date": "2016/01/03"
|
811
|
+
},
|
812
|
+
{
|
813
|
+
"pattern": "IstellaBot",
|
814
|
+
"url": "http://www.tiscali.it/",
|
815
|
+
"instances": ["Mozilla/5.0 (compatible; IstellaBot/1.23.15 +http://www.tiscali.it/)"],
|
816
|
+
"addition_date": "2016/01/09"
|
817
|
+
},
|
818
|
+
{
|
819
|
+
"pattern": "DeuSu\\/",
|
820
|
+
"addition_date": "2016/01/23",
|
821
|
+
"url": "https://deusu.de/robot.html"
|
822
|
+
},
|
823
|
+
{
|
824
|
+
"pattern": "betaBot",
|
825
|
+
"addition_date": "2016/01/23"
|
826
|
+
},
|
827
|
+
{
|
828
|
+
"pattern": "Cliqzbot\\/",
|
829
|
+
"addition_date": "2016/01/23",
|
830
|
+
"url": "http://cliqz.com/company/cliqzbot"
|
831
|
+
},
|
832
|
+
{
|
833
|
+
"pattern": "MojeekBot\\/",
|
834
|
+
"addition_date": "2016/01/23",
|
835
|
+
"url": "https://www.mojeek.com/bot.html"
|
836
|
+
},
|
837
|
+
{
|
838
|
+
"pattern": "netEstate NE Crawler",
|
839
|
+
"addition_date": "2016/01/23",
|
840
|
+
"url": "+http://www.website-datenbank.de/"
|
841
|
+
},
|
842
|
+
{
|
843
|
+
"pattern": "SafeSearch microdata crawler",
|
844
|
+
"addition_date": "2016/01/23",
|
845
|
+
"url": "https://safesearch.avira.com"
|
846
|
+
},
|
847
|
+
{
|
848
|
+
"pattern": "Gluten Free Crawler\\/",
|
849
|
+
"addition_date": "2016/01/23",
|
850
|
+
"url": "http://glutenfreepleasure.com/"
|
851
|
+
},
|
852
|
+
{
|
853
|
+
"pattern": "Sonic",
|
854
|
+
"addition_date": "2016/02/08",
|
855
|
+
"url": "http://www.yama.info.waseda.ac.jp/~crawler/info.html"
|
856
|
+
},
|
857
|
+
{
|
858
|
+
"pattern": "Sysomos",
|
859
|
+
"addition_date": "2016/02/08",
|
860
|
+
"url": "http://www.sysomos.com"
|
861
|
+
},
|
862
|
+
{
|
863
|
+
"pattern": "Trove",
|
864
|
+
"addition_date": "2016/02/08",
|
865
|
+
"url": "http://www.trove.com"
|
866
|
+
},
|
867
|
+
{
|
868
|
+
"pattern": "deadlinkchecker",
|
869
|
+
"addition_date": "2016/02/08",
|
870
|
+
"url": "http://www.deadlinkchecker.com"
|
871
|
+
},
|
872
|
+
{
|
873
|
+
"pattern": "Slack-ImgProxy",
|
874
|
+
"addition_date": "2016/04/25",
|
875
|
+
"url": "https://api.slack.com/robots"
|
876
|
+
},
|
877
|
+
{
|
878
|
+
"pattern": "Embedly",
|
879
|
+
"addition_date": "2016/04/25",
|
880
|
+
"url": "http://support.embed.ly"
|
881
|
+
}
|
750
882
|
]
|
data/lib/voight_kampff.rb
CHANGED
data/voight_kampff.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |s|
|
|
10
10
|
s.licenses = ['MIT']
|
11
11
|
|
12
12
|
s.author = "Adam Crownoble"
|
13
|
-
s.email = "adam@
|
13
|
+
s.email = "adam@codenoble.com"
|
14
14
|
s.homepage = "https://github.com/biola/Voight-Kampff"
|
15
15
|
|
16
16
|
# so that rubygems does not uses the actual object
|
@@ -21,7 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.test_files = `git ls-files -- {tests}/**/*`.split("\n")
|
22
22
|
s.require_path = 'lib'
|
23
23
|
|
24
|
-
s.add_dependency 'rack', '~> 1.
|
24
|
+
s.add_dependency 'rack', '~> 1.5'
|
25
25
|
|
26
26
|
s.add_development_dependency 'rails', '~> 4.2'
|
27
27
|
s.add_development_dependency 'rspec-rails', '~> 3.3'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: voight_kampff
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Adam Crownoble
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-05-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rack
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.5'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.5'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rails
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.5'
|
69
69
|
description: Voight-Kampff detects bots, spiders, crawlers and replicants
|
70
|
-
email: adam@
|
70
|
+
email: adam@codenoble.com
|
71
71
|
executables: []
|
72
72
|
extensions: []
|
73
73
|
extra_rdoc_files: []
|
@@ -117,9 +117,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
117
117
|
version: '0'
|
118
118
|
requirements: []
|
119
119
|
rubyforge_project:
|
120
|
-
rubygems_version: 2.
|
120
|
+
rubygems_version: 2.5.1
|
121
121
|
signing_key:
|
122
122
|
specification_version: 4
|
123
123
|
summary: Voight-Kampff bot detection
|
124
124
|
test_files: []
|
125
|
-
has_rdoc:
|