is_crawler 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +3 -1
- data/lib/config/crawlers.yml +34 -0
- data/lib/crawler.rb +5 -19
- data/lib/is_crawler/version.rb +1 -1
- data/spec/is_crawler_spec.rb +3 -3
- data/spec/lib/crawler_spec.rb +2 -1
- metadata +4 -3
data/README.md
CHANGED
@@ -32,7 +32,9 @@ You can use the `is_crawler?` method with just a user agent string to determine
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
...or provide one or more crawlers to find out if the string matches *specific* crawlers:
|
35
|
+
...or provide one or more crawlers to find out if the string matches *specific* crawlers:
|
36
|
+
|
37
|
+
is_crawler?("Some User Agent/1.0", :facebook, :google)
|
36
38
|
|
37
39
|
You can also define custom crawlers like this:
|
38
40
|
|
@@ -0,0 +1,34 @@
|
|
1
|
+
addthis:
|
2
|
+
ua_string: AddThis.com
|
3
|
+
alexa:
|
4
|
+
ua_string: ia_archiver
|
5
|
+
archive_org:
|
6
|
+
ua_string: archive.org_bot
|
7
|
+
bing:
|
8
|
+
ua_string: bingbot
|
9
|
+
bitly:
|
10
|
+
ua_string: bitlybot
|
11
|
+
exabot:
|
12
|
+
ua_string: Exabot
|
13
|
+
facebook:
|
14
|
+
ua_string: facebookexternalhit
|
15
|
+
flipboard:
|
16
|
+
ua_string: FlipboardProxy
|
17
|
+
google:
|
18
|
+
ua_string: Googlebot
|
19
|
+
google_web_preview:
|
20
|
+
ua_string: Google Web Preview
|
21
|
+
msn:
|
22
|
+
ua_string: MSNBot
|
23
|
+
openwebspider:
|
24
|
+
ua_string: OpenWebSpider
|
25
|
+
technorati:
|
26
|
+
ua_string: Technoratibot
|
27
|
+
twitter:
|
28
|
+
ua_string: Twitterbot
|
29
|
+
yahoo:
|
30
|
+
ua_string: Yahoo! Slurp
|
31
|
+
yahoo_jp:
|
32
|
+
ua_string: Y!J
|
33
|
+
yandex:
|
34
|
+
ua_string: Yandex
|
data/lib/crawler.rb
CHANGED
@@ -1,23 +1,9 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
class Crawler < Struct.new(:name, :ua_string)
|
2
|
-
DEFAULT =
|
3
|
-
|
4
|
-
|
5
|
-
Crawler.new(:archive_org, "archive.org_bot"),
|
6
|
-
Crawler.new(:bing, "bingbot"),
|
7
|
-
Crawler.new(:bitly, "bitlybot"),
|
8
|
-
Crawler.new(:exabot, "Exabot"),
|
9
|
-
Crawler.new(:facebook, "facebookexternalhit"),
|
10
|
-
Crawler.new(:flipboard, "FlipboardProxy"),
|
11
|
-
Crawler.new(:google, "Googlebot"),
|
12
|
-
Crawler.new(:google_web_preview, "Google Web Preview"),
|
13
|
-
Crawler.new(:msn, "MSNBot"),
|
14
|
-
Crawler.new(:openwebspider, "OpenWebSpider"),
|
15
|
-
Crawler.new(:technorati, "Technoratibot"),
|
16
|
-
Crawler.new(:twitter, "Twitterbot"),
|
17
|
-
Crawler.new(:yahoo, "Yahoo! Slurp"),
|
18
|
-
Crawler.new(:yahoo_jp, "Y!J"),
|
19
|
-
Crawler.new(:yandex, "Yandex")
|
20
|
-
].freeze
|
4
|
+
DEFAULT = YAML.load(File.read(File.expand_path('../config/crawlers.yml', __FILE__))).collect do |k,v|
|
5
|
+
Crawler.new(k.to_sym, v["ua_string"])
|
6
|
+
end
|
21
7
|
|
22
8
|
CUSTOM = []
|
23
9
|
|
data/lib/is_crawler/version.rb
CHANGED
data/spec/is_crawler_spec.rb
CHANGED
@@ -4,7 +4,7 @@ describe IsCrawler do
|
|
4
4
|
describe '#is_crawler?' do
|
5
5
|
context 'When specific crawlers are provided' do
|
6
6
|
subject { Test.new.is_crawler?(user_agent, :facebook, :google) }
|
7
|
-
context '
|
7
|
+
context 'and the provided string matches a crawler' do
|
8
8
|
context 'and it is in the specified list' do
|
9
9
|
context 'as the first element' do
|
10
10
|
let(:user_agent) { "facebookexternalhit/1.1" }
|
@@ -17,13 +17,13 @@ describe IsCrawler do
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
context '
|
20
|
+
context 'but it is not in the specified list' do
|
21
21
|
let(:user_agent) { "Twitterbot/1.1" }
|
22
22
|
it { should be_false }
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
context '
|
26
|
+
context 'but the provided string matches no crawlers' do
|
27
27
|
it { should be_false }
|
28
28
|
end
|
29
29
|
end
|
data/spec/lib/crawler_spec.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
describe Crawler do
|
2
2
|
let(:user_agent) { "Commodo Vestibulum/1.0" }
|
3
|
+
|
3
4
|
describe '.matches_any?' do
|
4
5
|
subject { Crawler.matches_any?(user_agent) }
|
5
6
|
|
@@ -37,7 +38,7 @@ describe Crawler do
|
|
37
38
|
subject { Crawler.which_crawler(user_agent) }
|
38
39
|
context 'When the provided string matches a crawler' do
|
39
40
|
let(:user_agent) { "facebookexternalhit/1.1" }
|
40
|
-
it { should
|
41
|
+
it { should be :facebook }
|
41
42
|
end
|
42
43
|
|
43
44
|
context 'When the provided string matches no crawlers' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: is_crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -57,6 +57,7 @@ files:
|
|
57
57
|
- README.md
|
58
58
|
- Rakefile
|
59
59
|
- is_crawler.gemspec
|
60
|
+
- lib/config/crawlers.yml
|
60
61
|
- lib/crawler.rb
|
61
62
|
- lib/is_crawler.rb
|
62
63
|
- lib/is_crawler/version.rb
|
@@ -76,7 +77,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
76
77
|
version: '0'
|
77
78
|
segments:
|
78
79
|
- 0
|
79
|
-
hash:
|
80
|
+
hash: -3983243957140806942
|
80
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
82
|
none: false
|
82
83
|
requirements:
|
@@ -85,7 +86,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
85
86
|
version: '0'
|
86
87
|
segments:
|
87
88
|
- 0
|
88
|
-
hash:
|
89
|
+
hash: -3983243957140806942
|
89
90
|
requirements: []
|
90
91
|
rubyforge_project:
|
91
92
|
rubygems_version: 1.8.24
|