woothee 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +1 -1
- data/lib/woothee/appliance.rb +1 -1
- data/lib/woothee/browser.rb +3 -1
- data/lib/woothee/crawler.rb +30 -6
- data/lib/woothee/dataset.rb +17 -2
- data/lib/woothee/misc.rb +10 -6
- data/woothee.gemspec +1 -1
- metadata +9 -13
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 13da5f2dcefa4793ff7a8868504db2965fe0dc86
|
4
|
+
data.tar.gz: a57fad8bf90c80c9a21d840a6304a1b75b762293
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: a678ca5fbe5b9731c34b895c1831a8fdbf99ba370c73bd6e791b66da12b8fe8fa95b585cddc434f9ff3ba8112847be51400bc0f9221c18e2e7e1a09ba828539b
|
7
|
+
data.tar.gz: 7ef9e2d99119fb99861cefe8e1c7306ddeb625845db13459ee3c61d52fce9543d17546c6fc2e7023041ee58a71d7cb2f166d7b4307a095a0f87d4c0b26b5e406
|
data/Gemfile
CHANGED
data/lib/woothee/appliance.rb
CHANGED
@@ -10,7 +10,7 @@ module Woothee::Appliance
|
|
10
10
|
data = case
|
11
11
|
when ua.index('PSP (PlayStation Portable);') then Woothee::DataSet.get('PSP')
|
12
12
|
when ua.index('PlayStation Vita') then Woothee::DataSet.get('PSVita')
|
13
|
-
when ua.index('PLAYSTATION 3;') then Woothee::DataSet.get('PS3')
|
13
|
+
when ua.index('PLAYSTATION 3 ') || ua.index('PLAYSTATION 3;') then Woothee::DataSet.get('PS3')
|
14
14
|
else nil
|
15
15
|
end
|
16
16
|
return false unless data
|
data/lib/woothee/browser.rb
CHANGED
@@ -57,7 +57,9 @@ module Woothee::Browser
|
|
57
57
|
def self.challenge_opera(ua, result)
|
58
58
|
return false if ua.index('Opera').nil?
|
59
59
|
|
60
|
-
version = if ua =~ /
|
60
|
+
version = if ua =~ /Version\/([.0-9]+)/o
|
61
|
+
$1
|
62
|
+
elsif ua =~ /Opera[\/ ]([.0-9]+)/o
|
61
63
|
$1
|
62
64
|
else
|
63
65
|
Woothee::VALUE_UNKNOWN
|
data/lib/woothee/crawler.rb
CHANGED
@@ -45,8 +45,8 @@ module Woothee::Crawler
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def self.challenge_crawlers(ua, result)
|
48
|
-
if ua.index('Yahoo') or ua.index('listing.yahoo.co.jp/support/faq/')
|
49
|
-
if ua.index('compatible; Yahoo! Slurp
|
48
|
+
if ua.index('Yahoo') or ua.index('help.yahoo.co.jp/help/jp/') or ua.index('listing.yahoo.co.jp/support/faq/')
|
49
|
+
if ua.index('compatible; Yahoo! Slurp')
|
50
50
|
update_map(result, Woothee::DataSet.get('YahooSlurp'))
|
51
51
|
return true
|
52
52
|
end
|
@@ -54,7 +54,7 @@ module Woothee::Crawler
|
|
54
54
|
update_map(result, Woothee::DataSet.get('YahooJP'))
|
55
55
|
return true
|
56
56
|
end
|
57
|
-
if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/')
|
57
|
+
if ua.index('crawler (http://listing.yahoo.co.jp/support/faq/') || ua.index('crawler (http://help.yahoo.co.jp/help/jp/')
|
58
58
|
update_map(result, Woothee::DataSet.get('YahooJP'))
|
59
59
|
return true
|
60
60
|
end
|
@@ -95,14 +95,38 @@ module Woothee::Crawler
|
|
95
95
|
end
|
96
96
|
if ua.index('ichiro')
|
97
97
|
if ua.index('http://help.goo.ne.jp/door/crawler.html') or ua.index('compatible; ichiro/mobile goo;')
|
98
|
-
update_map(result, Woothee::DataSet.get('
|
98
|
+
update_map(result, Woothee::DataSet.get('goo'))
|
99
99
|
return true
|
100
100
|
end
|
101
101
|
end
|
102
|
+
if ua.index('gooblogsearch/')
|
103
|
+
update_map(result, Woothee::DataSet.get('goo'))
|
104
|
+
return true
|
105
|
+
end
|
102
106
|
if ua.index('Apple-PubSub')
|
103
107
|
update_map(result, Woothee::DataSet.get('ApplePubSub'))
|
104
108
|
return true
|
105
109
|
end
|
110
|
+
if ua.index("(www.radian6.com/crawler)")
|
111
|
+
update_map(result, Woothee::DataSet.get("radian6"))
|
112
|
+
return true
|
113
|
+
end
|
114
|
+
if ua.index('Genieo/')
|
115
|
+
update_map(result, Woothee::DataSet.get("Genieo"))
|
116
|
+
return true
|
117
|
+
end
|
118
|
+
if ua.index("labs.topsy.com/butterfly/")
|
119
|
+
update_map(result, Woothee::DataSet.get("topsyButterfly"))
|
120
|
+
return true
|
121
|
+
end
|
122
|
+
if ua.index("rogerbot/1.0 (http://www.seomoz.org/dp/rogerbot")
|
123
|
+
update_map(result, Woothee::DataSet.get("rogerbot"))
|
124
|
+
return true
|
125
|
+
end
|
126
|
+
if ua.index("compatible; AhrefsBot/")
|
127
|
+
update_map(result, Woothee::DataSet.get("AhrefsBot"))
|
128
|
+
return true
|
129
|
+
end
|
106
130
|
if ua.index('livedoor FeedFetcher') or ua.index('Fastladder FeedFetcher')
|
107
131
|
update_map(result, Woothee::DataSet.get('livedoorFeedFetcher'))
|
108
132
|
return true
|
@@ -113,7 +137,7 @@ module Woothee::Crawler
|
|
113
137
|
return true
|
114
138
|
end
|
115
139
|
end
|
116
|
-
if ua.index('mixi-check') or ua.index('mixi-news-crawler')
|
140
|
+
if ua.index('mixi-check') or ua.index('mixi-crawler') or ua.index('mixi-news-crawler')
|
117
141
|
update_map(result, Woothee::DataSet.get('mixi'))
|
118
142
|
return true
|
119
143
|
end
|
@@ -128,7 +152,7 @@ module Woothee::Crawler
|
|
128
152
|
end
|
129
153
|
|
130
154
|
def self.challenge_maybe_crawler(ua, result)
|
131
|
-
if ua =~ /bot(?:[-_ .\/;@()]|$)/oi
|
155
|
+
if ua =~ /(bot|crawler|spider)(?:[-_ .\/;@()]|$)/oi
|
132
156
|
update_map(result, Woothee::DataSet.get('VariousCrawler'))
|
133
157
|
return true
|
134
158
|
end
|
data/lib/woothee/dataset.rb
CHANGED
@@ -33,7 +33,7 @@ end
|
|
33
33
|
|
34
34
|
module Woothee::DataSet
|
35
35
|
DATASET = {}
|
36
|
-
# GENERATED from dataset.yaml at Mon
|
36
|
+
# GENERATED from dataset.yaml at Mon Jun 10 18:14:36 JST 2013 by tagomoris
|
37
37
|
obj = {:label => 'MSIE', :name => 'Internet Explorer', :type => :browser}
|
38
38
|
obj[:vendor] = 'Microsoft'
|
39
39
|
DATASET[obj[:label]] = obj
|
@@ -265,10 +265,25 @@ module Woothee::DataSet
|
|
265
265
|
obj = {:label => 'ApplePubSub', :name => 'Apple iCloud', :type => :full}
|
266
266
|
obj[:category] = :crawler
|
267
267
|
DATASET[obj[:label]] = obj
|
268
|
+
obj = {:label => 'Genieo', :name => 'Genieo Web Filter', :type => :full}
|
269
|
+
obj[:category] = :crawler
|
270
|
+
DATASET[obj[:label]] = obj
|
271
|
+
obj = {:label => 'topsyButterfly', :name => 'topsy Butterfly', :type => :full}
|
272
|
+
obj[:category] = :crawler
|
273
|
+
DATASET[obj[:label]] = obj
|
274
|
+
obj = {:label => 'rogerbot', :name => 'SeoMoz rogerbot', :type => :full}
|
275
|
+
obj[:category] = :crawler
|
276
|
+
DATASET[obj[:label]] = obj
|
277
|
+
obj = {:label => 'AhrefsBot', :name => 'ahref AhrefsBot', :type => :full}
|
278
|
+
obj[:category] = :crawler
|
279
|
+
DATASET[obj[:label]] = obj
|
280
|
+
obj = {:label => 'radian6', :name => 'salesforce radian6', :type => :full}
|
281
|
+
obj[:category] = :crawler
|
282
|
+
DATASET[obj[:label]] = obj
|
268
283
|
obj = {:label => 'Hatena', :name => 'Hatena', :type => :full}
|
269
284
|
obj[:category] = :crawler
|
270
285
|
DATASET[obj[:label]] = obj
|
271
|
-
obj = {:label => '
|
286
|
+
obj = {:label => 'goo', :name => 'goo', :type => :full}
|
272
287
|
obj[:category] = :crawler
|
273
288
|
DATASET[obj[:label]] = obj
|
274
289
|
obj = {:label => 'livedoorFeedFetcher', :name => 'livedoor FeedFetcher', :type => :full}
|
data/lib/woothee/misc.rb
CHANGED
@@ -32,17 +32,19 @@ module Woothee::Misc
|
|
32
32
|
|
33
33
|
def self.challenge_http_library(ua, result)
|
34
34
|
data,version = case
|
35
|
-
when ua =~ /^(?:Apache-HttpClient\/|Jakarta Commons-HttpClient\/|Java\/)/o
|
35
|
+
when ua =~ /^(?:Apache-HttpClient\/|Jakarta Commons-HttpClient\/|Java\/)/o || ua =~ /[- ]HttpClient(\/|$)/o
|
36
|
+
[Woothee::DataSet.get('HTTPLibrary'), 'Java']
|
37
|
+
when ua.index('Java(TM) 2 Runtime Environment,')
|
36
38
|
[Woothee::DataSet.get('HTTPLibrary'), 'Java']
|
37
39
|
when ua =~ /^Wget/o
|
38
40
|
[Woothee::DataSet.get('HTTPLibrary'), 'wget']
|
39
41
|
when ua =~ /^(?:libwww-perl|WWW-Mechanize|LWP::Simple|LWP |lwp-trivial)/o
|
40
42
|
[Woothee::DataSet.get('HTTPLibrary'), 'perl']
|
41
|
-
when ua =~ /^
|
43
|
+
when ua =~ /^(?:Ruby|feedzirra|Typhoeus)/o
|
44
|
+
[Woothee::DataSet.get('HTTPLibrary'), 'ruby']
|
45
|
+
when ua =~ /^(Python-urllib\/|Twisted )/o
|
42
46
|
[Woothee::DataSet.get('HTTPLibrary'), 'python']
|
43
|
-
when ua =~ /^(
|
44
|
-
[Woothee::DataSet.get('HTTPLibrary'), 'php']
|
45
|
-
when ua.index('PEAR HTTP_Request class;')
|
47
|
+
when ua =~ /^(?:PHP|WordPress|CakePHP|PukiWiki|PECL::HTTP)(?:\/| |$)/o || ua =~ /(?:PEAR |)HTTP_Request(?: class|2)/o
|
46
48
|
[Woothee::DataSet.get('HTTPLibrary'), 'php']
|
47
49
|
else [nil,nil]
|
48
50
|
end
|
@@ -54,7 +56,9 @@ module Woothee::Misc
|
|
54
56
|
end
|
55
57
|
|
56
58
|
def self.challenge_maybe_rss_reader(ua, result)
|
57
|
-
data = if ua =~ /rss(?:reader|bar|[-_ \/;()])/oi
|
59
|
+
data = if ua =~ /rss(?:reader|bar|[-_ \/;()]|[ +]*\/)/oi || ua =~ /headline-reader/oi
|
60
|
+
Woothee::DataSet.get('VariousRSSReader')
|
61
|
+
elsif ua.index('cococ/')
|
58
62
|
Woothee::DataSet.get('VariousRSSReader')
|
59
63
|
else
|
60
64
|
nil
|
data/woothee.gemspec
CHANGED
@@ -6,7 +6,7 @@ Gem::Specification.new do |gem|
|
|
6
6
|
gem.description = "Cross-language UserAgent classifier library, ruby implementation"
|
7
7
|
gem.homepage = "https://github.com/tagomoris/woothee"
|
8
8
|
gem.summary = gem.description
|
9
|
-
gem.version = "0.3.
|
9
|
+
gem.version = "0.3.2"
|
10
10
|
gem.authors = ["TAGOMORI Satoshi"]
|
11
11
|
gem.email = "tagomoris@gmail.com"
|
12
12
|
gem.has_rdoc = false
|
metadata
CHANGED
@@ -1,30 +1,27 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: woothee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
5
|
-
prerelease:
|
4
|
+
version: 0.3.2
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- TAGOMORI Satoshi
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date:
|
11
|
+
date: 2013-06-10 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: rspec
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: 2.8.0
|
22
20
|
type: :development
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: 2.8.0
|
30
27
|
description: Cross-language UserAgent classifier library, ruby implementation
|
@@ -51,27 +48,26 @@ files:
|
|
51
48
|
- woothee.gemspec
|
52
49
|
homepage: https://github.com/tagomoris/woothee
|
53
50
|
licenses: []
|
51
|
+
metadata: {}
|
54
52
|
post_install_message:
|
55
53
|
rdoc_options: []
|
56
54
|
require_paths:
|
57
55
|
- lib
|
58
56
|
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
-
none: false
|
60
57
|
requirements:
|
61
|
-
- -
|
58
|
+
- - '>='
|
62
59
|
- !ruby/object:Gem::Version
|
63
60
|
version: '0'
|
64
61
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
-
none: false
|
66
62
|
requirements:
|
67
|
-
- -
|
63
|
+
- - '>='
|
68
64
|
- !ruby/object:Gem::Version
|
69
65
|
version: '0'
|
70
66
|
requirements: []
|
71
67
|
rubyforge_project:
|
72
|
-
rubygems_version:
|
68
|
+
rubygems_version: 2.0.2
|
73
69
|
signing_key:
|
74
|
-
specification_version:
|
70
|
+
specification_version: 4
|
75
71
|
summary: Cross-language UserAgent classifier library, ruby implementation
|
76
72
|
test_files:
|
77
73
|
- spec/00_valid_spec.rb
|