bot_detection 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c246237ec3d560688b788f3484a98e1f0be542e7
4
- data.tar.gz: 2602e39c633e6288bc8c8520d129d18dbe3ce589
3
+ metadata.gz: 3f9cf46bdce8f49486f6a1bc88f966f2068920aa
4
+ data.tar.gz: 7ec14e78a5cf23c8534642f945186b95ccb8f042
5
5
  SHA512:
6
- metadata.gz: 44b76dff0c36b98cca4d73f3797ebe1397c1e3b164db6b8b764039296ee627fc95807555262e107cbfab04fa8465d660c9d0f84c667547e5463588ae674a9a2b
7
- data.tar.gz: 6c4cedabfe59efd419e517f8d81be89d5f4e011182b5c84e5d66df7d61f24cd41c81b92f9c609ef649e0b1c99995bafecd719d7d79eacc958dca3c5973a180d0
6
+ metadata.gz: e455e84029af1e41be86f87b381f028ee36443dcb7cb202cceaff4be8516bcdcd50029676d10af21513e4e151f6fdd60f8d19263742567f773cc39f224b72b17
7
+ data.tar.gz: 964f6ef6ce7fe47fe5b97cdc474462557056c394e79041f6983056ab9589a8be3160242694b0d602313b33c8ed18cf1dbf3e520c1d4d5e4d8bb6f3117823433c
data/.gitignore CHANGED
@@ -20,3 +20,4 @@ tmp
20
20
  *.o
21
21
  *.a
22
22
  mkmf.log
23
+ .rvmrc
data/.travis.yml ADDED
@@ -0,0 +1,11 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.0.0
5
+ - 2.1
6
+ - 2.2
7
+
8
+ notifications:
9
+ email:
10
+ on_success: change
11
+ on_failure: always
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  # Changelog
2
2
 
3
+ #### Release 1.0.7
4
+
5
+ - added several new user agents, which have been spotted around, to the list
6
+ - added rspec to validate builds on travis
7
+
3
8
  #### Release 1.0.6
4
9
 
5
10
  - added several new user agents, which have been spotted around, to the list
data/Rakefile CHANGED
@@ -1,2 +1,4 @@
1
1
  require "bundler/gem_tasks"
2
-
2
+ require 'rspec/core/rake_task'
3
+ RSpec::Core::RakeTask.new(:spec)
4
+ task :default => :spec
@@ -5,6 +5,7 @@ require 'bot_detection/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "bot_detection"
8
+ spec.description = "Checks a user agent for a web crawler"
8
9
  spec.version = BotDetection::VERSION
9
10
  spec.authors = ["Nils Berenbold"]
10
11
  spec.email = ["nils.berenbold@gmail.com"]
@@ -16,8 +17,9 @@ Gem::Specification.new do |spec|
16
17
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
18
  spec.require_paths = ["lib"]
18
19
 
19
- spec.add_runtime_dependency "public_suffix", "~> 1.4.6"
20
+ spec.add_runtime_dependency "public_suffix", "~> 1.4"
20
21
 
22
+ spec.add_development_dependency "rspec", "~> 3.1"
21
23
  spec.add_development_dependency "bundler", "~> 1.6"
22
- spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "rake", "~> 10.4"
23
25
  end
@@ -12,6 +12,7 @@ BotDetection::BotUserAgents = [
12
12
  "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.0; trendictionbot0.5.0; trendiction search; http://www.trendiction.de/bot; please let us know of any problems; web at trendiction.com) Gecko/20071127 Firefox/3.0.0.11",
13
13
  "ia_archiver (+http://www.alexa.com/site/help/webmasters; crawler@alexa.com)",
14
14
  "Mozilla/5.0 (compatible; Exabot/3.0 (BiggerBetter); +http://www.exabot.com/go/robot)",
15
+ "Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)",
15
16
  "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots)",
16
17
  "Mozilla/5.0 (compatible; SMTBot/1.0; +http://www.similartech.com/smtbot)",
17
18
  "Twitterbot/1.0",
@@ -41,8 +42,48 @@ BotDetection::BotUserAgents = [
41
42
  "bitlybot",
42
43
  "Mozilla/5.0 (TweetmemeBot/4.0; +http://datasift.com/bot.html) Gecko/20100101 Firefox/31.0",
43
44
  "Mozilla/5.0 (Linux; U; Android 4.2.2; bg-bg; CUBOT X6 Build/JDQ39) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
45
+ "Mozilla/5.0 (Linux; U; Android 4.2.2; de-de; CUBOT S108 Build/CUBOT_S108) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30",
44
46
  "DoCoMo/2.0 N905i(c100;TB;W24H16) (compatible; Googlebot-Mobile/2.1; +http://www.google.com/bot.html)",
45
47
  "AppEngine-Google; (+http://code.google.com/appengine; appid: asburyisgay)",
46
48
  "Mozilla/5.0 (compatible; SemrushBot/0.98~bl; +http://www.semrush.com/bot.html)",
47
49
  "Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)",
50
+ "Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)",
51
+ "MaxPointCrawler/Nutch-1.1 (maxpoint.crawler at maxpointinteractive dot com)",
52
+ "woobot/2.0",
53
+ "techmixx Spider v1 (http://techmixx.com/)",
54
+ "WordPress/4.0.1; http://novarabota.org",
55
+ "yacybot (/global; amd64 Windows 7 6.1; java 1.8.0_25; Europe/en) http://yacy.net/bot.html",
56
+ "Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/Fast/2.0; +http://go.mail.ru/help/robots)",
57
+ "QuerySeekerSpider ( http://queryseeker.com/bot.html )",
58
+ "Mozilla/5.0 (compatible; LinkpadBot/1.06; +http://www.linkpad.ru)",
59
+ "Mozilla/5.0 (compatible; MixrankBot; crawler@mixrank.com)",
60
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-wherecat@moz.com)",
61
+ "Mozilla/5.0 (compatible; OpenHoseBot/2.1; +http://www.openhose.org/bot.html)",
62
+ "Superarama.com - BOT/v.0.1",
63
+ "Mozilla/5.0 (compatible; memoryBot/1.21.14 +http://mignify.com/bot.html)",
64
+ "Elmer, the Thinglink ImageBot (http://www.thinglink.com/help/ThinglinkImageBot)",
65
+ "Mozilla/5.0 (compatible; SEOkicks-Robot; +http://www.seokicks.de/robot.html)",
66
+ "Mozilla/5.0 (compatible; MJ12bot/v1.4.4; http://www.majestic12.co.uk/bot.php?+)",
67
+ "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; CrystalSemanticsBot http://www.crystalsemantics.com/service-navigation/imprint/useragent/)",
68
+ "Mozilla/5.0 (compatible;acapbot/0.1;treat like Googlebot)",
69
+ "Mozilla/5.0 (compatible; archive.org_bot +http://archive.org/details/archive.org_bot)",
70
+ "Slackbot-LinkExpanding 1.0 (+https://api.slack.com/robots)",
71
+ "Wotbox/2.01 (+http://www.wotbox.com/bot/)",
72
+ "Wotbox/2.0 (bot@wotbox.com; http://www.wotbox.com)",
73
+ "Mozilla/5.0 (compatible; EveryoneSocialBot/1.0; support@everyonesocial.com http://everyonesocial.com/)",
74
+ "Jabse.com/2.0 (+http://www.jabse.com/bot.php)",
75
+ "Cliqzbot/0.1 (+http://cliqz.com/company/cliqzbot)",
76
+ "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 (.NET CLR 3.5.30729; Diffbot/0.1; +http://www.diffbot.com)",
77
+ "CCBot/2.0 (http://commoncrawl.org/faq/)",
78
+ "Mozilla/5.0 (compatible; Kraken/0.1; http://linkfluence.net/; bot@linkfluence.net)",
79
+ "Mozilla/5.0 (compatible; uMBot-FC/1.0; mailto: crawling@ubermetrics-technologies.com)",
80
+ "Mozilla/5.0 (compatible; memoryBot/1.20.311 +http://internetmemory.org/en/)",
81
+ "Mozilla/5.0 (compatible; uMBot-LN/1.0; mailto: crawling@ubermetrics-technologies.com)",
82
+ "Mozilla/5.0 (compatible; linkdexbot/2.0; +http://www.linkdex.com/bots/)",
83
+ "German Wikipedia Broken Weblinks Bot; contact: gifti@tools.wmflabs.org",
84
+ "Mozilla/5.0 (compatible; SeznamBot/3.2; +http://fulltext.sblog.cz/)",
85
+ "Mozilla/5.0 (compatible; TwitterCrawler)",
86
+ "rogerbot/1.0 (http://moz.com/help/pro/what-is-rogerbot-, rogerbot-crawler+shiny@moz.com)",
87
+ "crawler4j (http://code.google.com/p/crawler4j/)",
88
+ "Mozilla/5.0 (compatible; PaperLiBot/2.1; http://support.paper.li/entries/20023257-what-is-paper-li)",
48
89
  ]
@@ -3,6 +3,7 @@ BotDetection::SearchEngineUserAgents = [
3
3
  "Baiduspider+(+http://www.baidu.com/search/spider.htm)",
4
4
  "Baiduspider+(+http://www.baidu.com/search/spider_jp.html)",
5
5
  "BaiDuSpider",
6
+ "Mozilla/5.0+(compatible;+Baiduspider-cpro;++http://www.baidu.com/search/spider.html)",
6
7
  "DuckDuckBot/1.0; (+http://duckduckgo.com/duckduckbot.html)",
7
8
  "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)",
8
9
  "Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)",
@@ -28,10 +29,12 @@ BotDetection::SearchEngineUserAgents = [
28
29
  "Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)",
29
30
  "msnbot/2.1",
30
31
  "msnbot/2.0b",
32
+ "msnbot/2.0b (+http://search.msn.com/msnbot.htm)",
31
33
  "msnbot/1.1 (+http://search.msn.com/msnbot.htm)",
32
34
  "msnbot/1.1",
33
35
  "msnbot/1.0 (+http://search.msn.com/msnbot.htm)",
34
36
  "msnbot/0.9 (+http://search.msn.com/msnbot.htm)",
35
37
  "msnbot/0.11 ( http://search.msn.com/msnbot.htm)",
36
38
  "MSNBOT/0.1 (http://search.msn.com/msnbot.htm)",
39
+ "Mozilla/5.0 (compatible; Yeti/1.1; +http://help.naver.com/robots/)",
37
40
  ]
@@ -1,3 +1,3 @@
1
1
  module BotDetection
2
- VERSION = "1.0.6"
2
+ VERSION = "1.0.7"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bot_detection
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nils Berenbold
@@ -14,52 +14,67 @@ dependencies:
14
14
  name: public_suffix
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ~>
17
+ - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 1.4.6
19
+ version: '1.4'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ~>
24
+ - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 1.4.6
26
+ version: '1.4'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '3.1'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.1'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
- - - ~>
45
+ - - "~>"
32
46
  - !ruby/object:Gem::Version
33
47
  version: '1.6'
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
- - - ~>
52
+ - - "~>"
39
53
  - !ruby/object:Gem::Version
40
54
  version: '1.6'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - '>='
59
+ - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '0'
61
+ version: '10.4'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - '>='
66
+ - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '0'
55
- description:
68
+ version: '10.4'
69
+ description: Checks a user agent for a web crawler
56
70
  email:
57
71
  - nils.berenbold@gmail.com
58
72
  executables: []
59
73
  extensions: []
60
74
  extra_rdoc_files: []
61
75
  files:
62
- - .gitignore
76
+ - ".gitignore"
77
+ - ".travis.yml"
63
78
  - CHANGELOG.md
64
79
  - Gemfile
65
80
  - LICENSE.txt
@@ -82,17 +97,17 @@ require_paths:
82
97
  - lib
83
98
  required_ruby_version: !ruby/object:Gem::Requirement
84
99
  requirements:
85
- - - '>='
100
+ - - ">="
86
101
  - !ruby/object:Gem::Version
87
102
  version: '0'
88
103
  required_rubygems_version: !ruby/object:Gem::Requirement
89
104
  requirements:
90
- - - '>='
105
+ - - ">="
91
106
  - !ruby/object:Gem::Version
92
107
  version: '0'
93
108
  requirements: []
94
109
  rubyforge_project:
95
- rubygems_version: 2.0.14
110
+ rubygems_version: 2.4.5
96
111
  signing_key:
97
112
  specification_version: 4
98
113
  summary: Detects Search Engine crawlers by reverse DNS lookups.