woothee 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +1 -1
- data/lib/woothee/appliance.rb +1 -1
- data/lib/woothee/browser.rb +3 -1
- data/lib/woothee/crawler.rb +30 -6
- data/lib/woothee/dataset.rb +17 -2
- data/lib/woothee/misc.rb +10 -6
- data/woothee.gemspec +1 -1
- metadata +9 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 13da5f2dcefa4793ff7a8868504db2965fe0dc86
|
4
|
+
data.tar.gz: a57fad8bf90c80c9a21d840a6304a1b75b762293
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a678ca5fbe5b9731c34b895c1831a8fdbf99ba370c73bd6e791b66da12b8fe8fa95b585cddc434f9ff3ba8112847be51400bc0f9221c18e2e7e1a09ba828539b
|
7
|
+
data.tar.gz: 7ef9e2d99119fb99861cefe8e1c7306ddeb625845db13459ee3c61d52fce9543d17546c6fc2e7023041ee58a71d7cb2f166d7b4307a095a0f87d4c0b26b5e406
|
data/Gemfile
CHANGED
data/lib/woothee/appliance.rb
CHANGED
@@ -10,7 +10,7 @@ module Woothee::Appliance
|
|
10
10
|
data = case
|
11
11
|
when ua.index('PSP (PlayStation Portable);') then Woothee::DataSet.get('PSP')
|
12
12
|
when ua.index('PlayStation Vita') then Woothee::DataSet.get('PSVita')
|
13
|
-
when ua.index('PLAYSTATION 3;') then Woothee::DataSet.get('PS3')
|
13
|
+
when ua.index('PLAYSTATION 3 ') || ua.index('PLAYSTATION 3;') then Woothee::DataSet.get('PS3')
|
14
14
|
else nil
|
15
15
|
end
|
16
16
|
return false unless data
|
data/lib/woothee/browser.rb
CHANGED
@@ -57,7 +57,9 @@ module Woothee::Browser
|
|
57
57
|
def self.challenge_opera(ua, result)
|
58
58
|
return false if ua.index('Opera').nil?
|
59
59
|
|
60
|
-
version = if ua =~ /
|
60
|
+
version = if ua =~ /Version\/([.0-9]+)/o
|
61
|
+
$1
|
62
|
+
elsif ua =~ /Opera[\/ ]([.0-9]+)/o
|
61
63
|
$1
|
62
64
|
else
|
63
65
|
Woothee::VALUE_UNKNOWN
|
data/lib/woothee/crawler.rb
CHANGED
@@ -45,8 +45,8 @@ module Woothee::Crawler
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def self.challenge_crawlers(ua, result)
|
48
|
-
if ua.index('Yahoo') or ua.index('listing.yahoo.co.jp/support/faq/')
|
49
|
-
if ua.index('compatible; Yahoo! Slurp
|
48
|
+
if ua.index('Yahoo') or ua.index('help.yahoo.co.jp/help/jp/') or ua.index('listing.yahoo.co.jp/support/faq/')
|
49
|
+
if ua.index('compatible; Yahoo! Slurp')
|
50
50
|
update_map(result, Woothee::DataSet.get('YahooSlurp'))
|
51
51
|
return true
|
52
52
|
end
|
@@ -54,7 +54,7 @@ module Woothee::Crawler
|
|
54
54
|
update_map(result, Woothee::DataSet.get('YahooJP'))
|
55
55
|
return true
|
56
56
|
end
|
57
|
-
if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/')
|
57
|
+
if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/') || ua.index('crawler (http://help.yahoo.co.jp/help/jp/')
|
58
58
|
update_map(result, Woothee::DataSet.get('YahooJP'))
|
59
59
|
return true
|
60
60
|
end
|
@@ -95,14 +95,38 @@ module Woothee::Crawler
|
|
95
95
|
end
|
96
96
|
if ua.index('ichiro')
|
97
97
|
if ua.index('http://help.goo.ne.jp/door/crawler.html') or ua.index('compatible; ichiro/mobile goo;')
|
98
|
-
update_map(result, Woothee::DataSet.get('
|
98
|
+
update_map(result, Woothee::DataSet.get('goo'))
|
99
99
|
return true
|
100
100
|
end
|
101
101
|
end
|
102
|
+
if ua.index('gooblogsearch/')
|
103
|
+
update_map(result, Woothee::DataSet.get('goo'))
|
104
|
+
return true
|
105
|
+
end
|
102
106
|
if ua.index('Apple-PubSub')
|
103
107
|
update_map(result, Woothee::DataSet.get('ApplePubSub'))
|
104
108
|
return true
|
105
109
|
end
|
110
|
+
if ua.index("(www.radian6.com/crawler)")
|
111
|
+
update_map(result, Woothee::DataSet.get("radian6"))
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
if ua.index('Genieo/')
|
115
|
+
update_map(result, Woothee::DataSet.get("Genieo"))
|
116
|
+
return true
|
117
|
+
end
|
118
|
+
if ua.index("labs.topsy.com/butterfly/")
|
119
|
+
update_map(result, Woothee::DataSet.get("topsyButterfly"))
|
120
|
+
return true
|
121
|
+
end
|
122
|
+
if ua.index("rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot")
|
123
|
+
update_map(result, Woothee::DataSet.get("rogerbot"))
|
124
|
+
return true
|
125
|
+
end
|
126
|
+
if ua.index("compatible; AhrefsBot/")
|
127
|
+
update_map(result, Woothee::DataSet.get("AhrefsBot"))
|
128
|
+
return true
|
129
|
+
end
|
106
130
|
if ua.index('livedoor FeedFetcher') or ua.index('Fastladder FeedFetcher')
|
107
131
|
update_map(result, Woothee::DataSet.get('livedoorFeedFetcher'))
|
108
132
|
return true
|
@@ -113,7 +137,7 @@ module Woothee::Crawler
|
|
113
137
|
return true
|
114
138
|
end
|
115
139
|
end
|
116
|
-
if ua.index('mixi-check') or ua.index('mixi-news-crawler')
|
140
|
+
if ua.index('mixi-check') or ua.index('mixi-crawler') or ua.index('mixi-news-crawler')
|
117
141
|
update_map(result, Woothee::DataSet.get('mixi'))
|
118
142
|
return true
|
119
143
|
end
|
@@ -128,7 +152,7 @@ module Woothee::Crawler
|
|
128
152
|
end
|
129
153
|
|
130
154
|
def self.challenge_maybe_crawler(ua, result)
|
131
|
-
if ua =~ /bot(?:[-_ .\/;@()]|$)/oi
|
155
|
+
if ua =~ /(bot|crawler|spider)(?:[-_ .\/;@()]|$)/oi
|
132
156
|
update_map(result, Woothee::DataSet.get('VariousCrawler'))
|
133
157
|
return true
|
134
158
|
end
|
data/lib/woothee/dataset.rb
CHANGED
@@ -33,7 +33,7 @@ end
|
|
33
33
|
|
34
34
|
module Woothee::DataSet
|
35
35
|
DATASET = {}
|
36
|
-
# GENERATED from dataset.yaml at Mon
|
36
|
+
# GENERATED from dataset.yaml at Mon Jun 10 18:14:36 JST 2013 by tagomoris
|
37
37
|
obj = {:label => 'MSIE', :name => 'Internet Explorer', :type => :browser}
|
38
38
|
obj[:vendor] = 'Microsoft'
|
39
39
|
DATASET[obj[:label]] = obj
|
@@ -265,10 +265,25 @@ module Woothee::DataSet
|
|
265
265
|
obj = {:label => 'ApplePubSub', :name => 'Apple iCloud', :type => :full}
|
266
266
|
obj[:category] = :crawler
|
267
267
|
DATASET[obj[:label]] = obj
|
268
|
+
obj = {:label => 'Genieo', :name => 'Genieo Web Filter', :type => :full}
|
269
|
+
obj[:category] = :crawler
|
270
|
+
DATASET[obj[:label]] = obj
|
271
|
+
obj = {:label => 'topsyButterfly', :name => 'topsy Butterfly', :type => :full}
|
272
|
+
obj[:category] = :crawler
|
273
|
+
DATASET[obj[:label]] = obj
|
274
|
+
obj = {:label => 'rogerbot', :name => 'SeoMoz rogerbot', :type => :full}
|
275
|
+
obj[:category] = :crawler
|
276
|
+
DATASET[obj[:label]] = obj
|
277
|
+
obj = {:label => 'AhrefsBot', :name => 'ahref AhrefsBot', :type => :full}
|
278
|
+
obj[:category] = :crawler
|
279
|
+
DATASET[obj[:label]] = obj
|
280
|
+
obj = {:label => 'radian6', :name => 'salesforce radian6', :type => :full}
|
281
|
+
obj[:category] = :crawler
|
282
|
+
DATASET[obj[:label]] = obj
|
268
283
|
obj = {:label => 'Hatena', :name => 'Hatena', :type => :full}
|
269
284
|
obj[:category] = :crawler
|
270
285
|
DATASET[obj[:label]] = obj
|
271
|
-
obj = {:label => '
|
286
|
+
obj = {:label => 'goo', :name => 'goo', :type => :full}
|
272
287
|
obj[:category] = :crawler
|
273
288
|
DATASET[obj[:label]] = obj
|
274
289
|
obj = {:label => 'livedoorFeedFetcher', :name => 'livedoor FeedFetcher', :type => :full}
|
data/lib/woothee/misc.rb
CHANGED
@@ -32,17 +32,19 @@ module Woothee::Misc
|
|
32
32
|
|
33
33
|
def self.challenge_http_library(ua, result)
|
34
34
|
data,version = case
|
35
|
-
when ua =~ /^(?:Apache-HttpClient\/|Jakarta Commons-HttpClient\/|Java\/)/o
|
35
|
+
when ua =~ /^(?:Apache-HttpClient\/|Jakarta Commons-HttpClient\/|Java\/)/o || ua =~ /[- ]HttpClient(\/|$)/o
|
36
|
+
[Woothee::DataSet.get('HTTPLibrary'), 'Java']
|
37
|
+
when ua.index('Java(TM) 2 Runtime Environment,')
|
36
38
|
[Woothee::DataSet.get('HTTPLibrary'), 'Java']
|
37
39
|
when ua =~ /^Wget/o
|
38
40
|
[Woothee::DataSet.get('HTTPLibrary'), 'wget']
|
39
41
|
when ua =~ /^(?:libwww-perl|WWW-Mechanize|LWP::Simple|LWP |lwp-trivial)/o
|
40
42
|
[Woothee::DataSet.get('HTTPLibrary'), 'perl']
|
41
|
-
when ua =~ /^
|
43
|
+
when ua =~ /^(?:Ruby|feedzirra|Typhoeus)/o
|
44
|
+
[Woothee::DataSet.get('HTTPLibrary'), 'ruby']
|
45
|
+
when ua =~ /^(Python-urllib\/|Twisted )/o
|
42
46
|
[Woothee::DataSet.get('HTTPLibrary'), 'python']
|
43
|
-
when ua =~ /^(
|
44
|
-
[Woothee::DataSet.get('HTTPLibrary'), 'php']
|
45
|
-
when ua.index('PEAR HTTP_Request class;')
|
47
|
+
when ua =~ /^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:\/| |$)/o || ua =~ /(?:PEAR |)HTTP_Request(?: class|2)/o
|
46
48
|
[Woothee::DataSet.get('HTTPLibrary'), 'php']
|
47
49
|
else [nil,nil]
|
48
50
|
end
|
@@ -54,7 +56,9 @@ module Woothee::Misc
|
|
54
56
|
end
|
55
57
|
|
56
58
|
def self.challenge_maybe_rss_reader(ua, result)
|
57
|
-
data = if ua =~ /rss(?:reader|bar|[-_ \/;()])/oi
|
59
|
+
data = if ua =~ /rss(?:reader|bar|[-_ \/;()]|[ +]*\/)/oi || ua =~ /headline-reader/oi
|
60
|
+
Woothee::DataSet.get('VariousRSSReader')
|
61
|
+
elsif ua.index('cococ/')
|
58
62
|
Woothee::DataSet.get('VariousRSSReader')
|
59
63
|
else
|
60
64
|
nil
|
data/woothee.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |gem|
|
|
6
6
|
gem.description = "Cross-language UserAgent classifier library, ruby implementation"
|
7
7
|
gem.homepage = "https://github.com/tagomoris/woothee"
|
8
8
|
gem.summary = gem.description
|
9
|
-
gem.version = "0.3.
|
9
|
+
gem.version = "0.3.2"
|
10
10
|
gem.authors = ["TAGOMORI Satoshi"]
|
11
11
|
gem.email = "tagomoris@gmail.com"
|
12
12
|
gem.has_rdoc = false
|
metadata
CHANGED
@@ -1,30 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: woothee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- TAGOMORI Satoshi
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-06-10 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 2.8.0
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 2.8.0
|
30
27
|
description: Cross-language UserAgent classifier library, ruby implementation
|
@@ -51,27 +48,26 @@ files:
|
|
51
48
|
- woothee.gemspec
|
52
49
|
homepage: https://github.com/tagomoris/woothee
|
53
50
|
licenses: []
|
51
|
+
metadata: {}
|
54
52
|
post_install_message:
|
55
53
|
rdoc_options: []
|
56
54
|
require_paths:
|
57
55
|
- lib
|
58
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
57
|
requirements:
|
61
|
-
- -
|
58
|
+
- - '>='
|
62
59
|
- !ruby/object:Gem::Version
|
63
60
|
version: '0'
|
64
61
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
62
|
requirements:
|
67
|
-
- -
|
63
|
+
- - '>='
|
68
64
|
- !ruby/object:Gem::Version
|
69
65
|
version: '0'
|
70
66
|
requirements: []
|
71
67
|
rubyforge_project:
|
72
|
-
rubygems_version:
|
68
|
+
rubygems_version: 2.0.2
|
73
69
|
signing_key:
|
74
|
-
specification_version:
|
70
|
+
specification_version: 4
|
75
71
|
summary: Cross-language UserAgent classifier library, ruby implementation
|
76
72
|
test_files:
|
77
73
|
- spec/00_valid_spec.rb
|