device_detector 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +14 -2
- data/lib/device_detector.rb +13 -2
- data/lib/device_detector/bot.rb +15 -0
- data/lib/device_detector/parser.rb +4 -1
- data/lib/device_detector/version.rb +1 -1
- data/regexes/bots.yml +1002 -0
- data/spec/device_detector_spec.rb +94 -0
- metadata +3 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3b1d7d2d1f51f584026667fa8fff86b9d4cf994d
|
4
|
+
data.tar.gz: dc71e14d78d7432fd03ea583fbaaf039a1aa3242
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 956b2f1762edd6106dd96a8c920b88d0e1b323552790ba46d533063028802c4ca91f739a4c49d95a4ade4f273f1abad48a2a73781177ccdfd776857c45e087cc
|
7
|
+
data.tar.gz: 49a4bbc2a254452296f155e11837772afb3a4399308a18eb035c98ab0f7ead38ab850f0650b802043c4701afe077788ebae5a1ae33d52e4a2d2d6926281f6de6
|
data/README.md
CHANGED
@@ -1,8 +1,20 @@
|
|
1
1
|
# DeviceDetector
|
2
2
|
|
3
|
-
This is a Ruby port of the Universal Device Detection library.
|
3
|
+
This is a Ruby port of the Universal Device Detection library.
|
4
|
+
You can find the original code here: [https://github.com/piwik/device-detector].
|
4
5
|
|
5
|
-
The Universal Device Detection library will parse any User Agent and detect
|
6
|
+
The Universal Device Detection library will parse any User Agent and detect
|
7
|
+
the browser, operating system, device used (desktop, tablet, mobile, tv, cars,
|
8
|
+
console, etc.), brand and model.
|
9
|
+
|
10
|
+
## Disclaimer
|
11
|
+
|
12
|
+
This port does not aspire to be a one-to-one copy from the original code, but
|
13
|
+
rather an adaptation for the Ruby language.
|
14
|
+
|
15
|
+
Still, our goal is to use the original, unchanged regex yaml files, in order to
|
16
|
+
mutually benefit from updates and pull request to both the original and the
|
17
|
+
ported versions.
|
6
18
|
|
7
19
|
## Installation
|
8
20
|
|
data/lib/device_detector.rb
CHANGED
@@ -6,13 +6,12 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
6
6
|
require 'device_detector/version'
|
7
7
|
require 'device_detector/version_extractor'
|
8
8
|
require 'device_detector/parser'
|
9
|
+
require 'device_detector/bot'
|
9
10
|
require 'device_detector/client'
|
10
11
|
require 'device_detector/os'
|
11
12
|
|
12
13
|
class DeviceDetector
|
13
14
|
|
14
|
-
ROOT = Pathname.new(File.expand_path('../..', __FILE__))
|
15
|
-
|
16
15
|
attr_reader :user_agent
|
17
16
|
|
18
17
|
def initialize(user_agent)
|
@@ -39,8 +38,20 @@ class DeviceDetector
|
|
39
38
|
client.known?
|
40
39
|
end
|
41
40
|
|
41
|
+
def bot?
|
42
|
+
bot.bot?
|
43
|
+
end
|
44
|
+
|
45
|
+
def bot_name
|
46
|
+
bot.name
|
47
|
+
end
|
48
|
+
|
42
49
|
private
|
43
50
|
|
51
|
+
def bot
|
52
|
+
@bot ||= Bot.new(user_agent)
|
53
|
+
end
|
54
|
+
|
44
55
|
def client
|
45
56
|
@client ||= Client.new(user_agent)
|
46
57
|
end
|
@@ -29,10 +29,13 @@ class DeviceDetector
|
|
29
29
|
|
30
30
|
def filepaths
|
31
31
|
filenames.map do |filename|
|
32
|
-
File.join(
|
32
|
+
File.join(root, 'regexes', filename)
|
33
33
|
end
|
34
34
|
end
|
35
35
|
|
36
|
+
def root
|
37
|
+
Pathname.new(File.expand_path('../../..', __FILE__))
|
38
|
+
end
|
36
39
|
|
37
40
|
end
|
38
41
|
end
|
data/regexes/bots.yml
ADDED
@@ -0,0 +1,1002 @@
|
|
1
|
+
###############
|
2
|
+
# Device Detector - The Universal Device Detection library for parsing User Agents
|
3
|
+
#
|
4
|
+
# @link http://piwik.org
|
5
|
+
# @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
|
6
|
+
###############
|
7
|
+
|
8
|
+
- regex: '360Spider(-Image|-Video)?'
|
9
|
+
name: '360Spider'
|
10
|
+
category: 'Search bot'
|
11
|
+
url: 'http://www.so.com/help/help_3_2.html'
|
12
|
+
producer:
|
13
|
+
name: 'Online Media Group, Inc.'
|
14
|
+
url: ''
|
15
|
+
|
16
|
+
- regex: 'Aboundex'
|
17
|
+
name: 'Aboundexbot'
|
18
|
+
category: 'Search bot'
|
19
|
+
url: 'http://www.aboundex.com/crawler/'
|
20
|
+
producer:
|
21
|
+
name: 'Aboundex.com'
|
22
|
+
url: 'http://www.aboundex.com'
|
23
|
+
|
24
|
+
- regex: 'AcoonBot'
|
25
|
+
name: 'Acoon'
|
26
|
+
category: 'Search bot'
|
27
|
+
url: 'http://www.acoon.de/robot.asp'
|
28
|
+
producer:
|
29
|
+
name: 'Acoon GmbH'
|
30
|
+
url: 'http://www.acoon.de'
|
31
|
+
|
32
|
+
- regex: 'AddThis.com'
|
33
|
+
name: 'AddThis.com'
|
34
|
+
category: 'Social Media Agent'
|
35
|
+
url: ''
|
36
|
+
producer:
|
37
|
+
name: 'Clearspring Technologies, Inc.'
|
38
|
+
url: 'http://www.clearspring.com'
|
39
|
+
|
40
|
+
- regex: 'AhrefsBot'
|
41
|
+
name: 'aHrefs Bot'
|
42
|
+
category: 'Crawler'
|
43
|
+
url: 'http://ahrefs.com/robot'
|
44
|
+
producer:
|
45
|
+
name: 'Ahrefs Pte Ltd'
|
46
|
+
url: 'http://ahrefs.com/robot'
|
47
|
+
|
48
|
+
- regex: 'ia_archiver|alexabot|verifybot'
|
49
|
+
name: 'Alexa Crawler'
|
50
|
+
category: 'Search bot'
|
51
|
+
url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
|
52
|
+
producer:
|
53
|
+
name: 'Alexa Internet'
|
54
|
+
url: 'http://www.alexa.com'
|
55
|
+
|
56
|
+
- regex: 'AmorankSpider'
|
57
|
+
name: 'Amorank Spider'
|
58
|
+
category: 'Crawler'
|
59
|
+
url: 'http://amorank.com/webcrawler.html'
|
60
|
+
producer:
|
61
|
+
name: 'Amorank'
|
62
|
+
url: 'http://www.amorank.com'
|
63
|
+
|
64
|
+
- regex: 'Curious George'
|
65
|
+
name: 'Analytics SEO Crawler'
|
66
|
+
category: 'Crawler'
|
67
|
+
url: 'http://www.analyticsseo.com/crawler'
|
68
|
+
producer:
|
69
|
+
name: 'Analytics SEO'
|
70
|
+
url: 'http://www.analyticsseo.com'
|
71
|
+
|
72
|
+
- regex: 'archive.org_bot|special_archiver'
|
73
|
+
name: 'archive.org bot'
|
74
|
+
category: 'Crawler'
|
75
|
+
url: 'http://www.archive.org/details/archive.org_bot'
|
76
|
+
producer:
|
77
|
+
name: 'The Internet Archive'
|
78
|
+
url: 'http://www.archive.org'
|
79
|
+
|
80
|
+
- regex: 'Ask Jeeves/Teoma'
|
81
|
+
name: 'Ask Jeeves'
|
82
|
+
category: 'Search bot'
|
83
|
+
url: ''
|
84
|
+
producer:
|
85
|
+
name: 'Ask Jeeves Inc.'
|
86
|
+
url: 'http://www.ask.com'
|
87
|
+
|
88
|
+
- regex: 'Backlink-Ceck.de'
|
89
|
+
name: 'Backlink-Ceck.de'
|
90
|
+
category: 'Crawler'
|
91
|
+
url: 'http://www.backlink-check.de/bot.html'
|
92
|
+
producer:
|
93
|
+
name: 'Mediagreen Medienservice'
|
94
|
+
url: 'http://www.backlink-check.de'
|
95
|
+
|
96
|
+
- regex: 'BacklinkCrawler'
|
97
|
+
name: 'BacklinkCrawler'
|
98
|
+
category: 'Crawler'
|
99
|
+
url: 'http://www.backlinktest.com/crawler.html'
|
100
|
+
producer:
|
101
|
+
name: '2.0Promotion GbR'
|
102
|
+
url: 'http://www.backlinktest.com'
|
103
|
+
|
104
|
+
- regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
|
105
|
+
name: 'Baidu Spider'
|
106
|
+
category: 'Search bot'
|
107
|
+
url: 'http://www.baidu.com/search/spider.htm'
|
108
|
+
producer:
|
109
|
+
name: 'Baidu'
|
110
|
+
url: 'http://www.baidu.com'
|
111
|
+
|
112
|
+
- regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
|
113
|
+
name: 'BingBot'
|
114
|
+
category: 'Search bot'
|
115
|
+
url: 'http://search.msn.com/msnbot.htmn'
|
116
|
+
producer:
|
117
|
+
name: 'Microsoft Corporation'
|
118
|
+
url: 'http://www.microsoft.com'
|
119
|
+
|
120
|
+
- regex: 'Blekkobot'
|
121
|
+
name: 'Blekkobot'
|
122
|
+
category: 'Search bot'
|
123
|
+
url: 'http://blekko.com/about/blekkobot'
|
124
|
+
producer:
|
125
|
+
name: 'Blekko'
|
126
|
+
url: 'http://blekko.com'
|
127
|
+
|
128
|
+
- regex: 'BLEXBot(Test)?'
|
129
|
+
name: 'BLEXBot Crawler'
|
130
|
+
category: 'Crawler'
|
131
|
+
url: 'http://webmeup-crawler.com'
|
132
|
+
producer:
|
133
|
+
name: 'WebMeUp'
|
134
|
+
url: 'http://webmeup.com'
|
135
|
+
|
136
|
+
- regex: 'Bloglovin'
|
137
|
+
name: 'Bloglovin'
|
138
|
+
url: 'http://www.bloglovin.com'
|
139
|
+
category: 'Feed Fetcher'
|
140
|
+
producer:
|
141
|
+
name: ''
|
142
|
+
url: ''
|
143
|
+
|
144
|
+
- regex: 'BountiiBot'
|
145
|
+
name: 'Bountii Bot'
|
146
|
+
category: 'Search bot'
|
147
|
+
url: 'http://bountii.com/contact.php'
|
148
|
+
producer:
|
149
|
+
name: 'Bountii Inc.'
|
150
|
+
url: 'http://bountii.com'
|
151
|
+
|
152
|
+
- regex: 'Browsershots'
|
153
|
+
name: 'Browsershots'
|
154
|
+
category: 'Service Agent'
|
155
|
+
url: 'http://browsershots.org/faq'
|
156
|
+
producer:
|
157
|
+
name: 'Browsershots.org'
|
158
|
+
url: 'http://browsershots.org'
|
159
|
+
|
160
|
+
- regex: '(?<!HTC)[ _]Butterfly'
|
161
|
+
name: 'Butterfly Robot'
|
162
|
+
category: 'Search bot'
|
163
|
+
url: 'http://labs.topsy.com/butterfly'
|
164
|
+
producer:
|
165
|
+
name: 'Topsy Labs'
|
166
|
+
url: 'http://labs.topsy.com'
|
167
|
+
|
168
|
+
- regex: 'CareerBot'
|
169
|
+
name: 'CareerBot'
|
170
|
+
category: 'Crawler'
|
171
|
+
url: 'http://www.career-x.de/bot.html'
|
172
|
+
producer:
|
173
|
+
name: 'career-x GmbH'
|
174
|
+
url: 'http://www.career-x.de'
|
175
|
+
|
176
|
+
- regex: 'CCBot'
|
177
|
+
name: 'ccBot crawler'
|
178
|
+
category: 'Crawler'
|
179
|
+
url: 'http://commoncrawl.org/faq/'
|
180
|
+
producer:
|
181
|
+
name: 'reddit inc.'
|
182
|
+
url: 'http://www.reddit.com'
|
183
|
+
|
184
|
+
- regex: 'Cliqzbot'
|
185
|
+
name: 'Cliqzbot'
|
186
|
+
category: 'Crawler'
|
187
|
+
url: 'http://cliqz.com/company/cliqzbot'
|
188
|
+
producer:
|
189
|
+
name: '10betterpages GmbH'
|
190
|
+
url: 'http://cliqz.com'
|
191
|
+
|
192
|
+
- regex: 'CloudFlare-AlwaysOnline'
|
193
|
+
name: 'CloudFlare Always Online'
|
194
|
+
category: 'Site Monitor'
|
195
|
+
url: 'http://www.cloudflare.com/always-online'
|
196
|
+
producer:
|
197
|
+
name: 'CloudFlare'
|
198
|
+
url: 'http://www.cloudflare.com'
|
199
|
+
|
200
|
+
- regex: 'CommaFeed'
|
201
|
+
name: 'CommaFeed'
|
202
|
+
url: 'http://www.commafeed.com'
|
203
|
+
category: 'Feed Fetcher'
|
204
|
+
producer:
|
205
|
+
name: ''
|
206
|
+
url: ''
|
207
|
+
|
208
|
+
- regex: 'Dazoobot'
|
209
|
+
name: 'Dazoobot'
|
210
|
+
category: 'Search bot'
|
211
|
+
url: ''
|
212
|
+
producer:
|
213
|
+
name: 'DAZOO.FR'
|
214
|
+
url: 'http://dazoo.fr'
|
215
|
+
|
216
|
+
- regex: 'discobot(-news)?'
|
217
|
+
name: 'Discobot'
|
218
|
+
category: 'Search bot'
|
219
|
+
url: 'http://discoveryengine.com/discobot.html'
|
220
|
+
producer:
|
221
|
+
name: 'Discovery Engine'
|
222
|
+
url: 'http://discoveryengine.com'
|
223
|
+
|
224
|
+
- regex: 'DotBot'
|
225
|
+
name: 'DotBot'
|
226
|
+
category: 'Crawler'
|
227
|
+
url: 'http://www.opensiteexplorer.org/dotbot'
|
228
|
+
producer:
|
229
|
+
name: 'SEOmoz, Inc.'
|
230
|
+
url: 'http://moz.com/'
|
231
|
+
|
232
|
+
- regex: 'EasouSpider'
|
233
|
+
name: 'Easou Spider'
|
234
|
+
category: 'Search bot'
|
235
|
+
url: 'http://www.easou.com/search/spider.html'
|
236
|
+
producer:
|
237
|
+
name: 'easou ICP'
|
238
|
+
url: 'http://www.easou.com'
|
239
|
+
|
240
|
+
- regex: 'EMail Exractor'
|
241
|
+
name: 'EMail Exractor'
|
242
|
+
category: 'Crawler'
|
243
|
+
url: ''
|
244
|
+
producer:
|
245
|
+
name: ''
|
246
|
+
url: ''
|
247
|
+
|
248
|
+
- regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
|
249
|
+
name: 'ExaBot'
|
250
|
+
category: 'Crawler'
|
251
|
+
url: 'http://www.exabot.com/go/robot'
|
252
|
+
producer:
|
253
|
+
name: 'Dassault Systèmes'
|
254
|
+
url: 'http://www.3ds.com'
|
255
|
+
|
256
|
+
- regex: 'ExactSeek Crawler'
|
257
|
+
name: 'ExactSeek Crawler'
|
258
|
+
category: 'Search bot'
|
259
|
+
url: 'http://www.exactseek.com'
|
260
|
+
producer:
|
261
|
+
name: 'Jayde Online, Inc.'
|
262
|
+
url: 'http://www.jaydeonlineinc.com'
|
263
|
+
|
264
|
+
- regex: 'Ezooms'
|
265
|
+
name: 'Ezooms'
|
266
|
+
category: 'Crawler'
|
267
|
+
url: ''
|
268
|
+
producer:
|
269
|
+
name: 'SEOmoz, Inc.'
|
270
|
+
url: 'http://moz.com/'
|
271
|
+
|
272
|
+
- regex: 'facebookexternalhit|facebookplatform'
|
273
|
+
name: 'Facebook External Hit'
|
274
|
+
category: 'Social Media Agent'
|
275
|
+
url: 'https://www.facebook.com/externalhit_uatext.php'
|
276
|
+
producer:
|
277
|
+
name: 'Facebook'
|
278
|
+
url: 'http://www.facebook.com'
|
279
|
+
|
280
|
+
- regex: 'Feedbin'
|
281
|
+
name: 'Feedbin'
|
282
|
+
url: 'http://feedbin.com/'
|
283
|
+
category: 'Feed Fetcher'
|
284
|
+
producer:
|
285
|
+
name: ''
|
286
|
+
url: ''
|
287
|
+
|
288
|
+
- regex: 'FeedBurner'
|
289
|
+
name: 'FeedBurner'
|
290
|
+
url: 'http://www.feedburner.com'
|
291
|
+
category: 'Feed Fetcher'
|
292
|
+
producer:
|
293
|
+
name: ''
|
294
|
+
url: ''
|
295
|
+
|
296
|
+
- regex: '(Meta)?Feedly(Bot|App)?'
|
297
|
+
name: 'Feedly'
|
298
|
+
url: 'http://www.feedly.com'
|
299
|
+
category: 'Feed Fetcher'
|
300
|
+
producer:
|
301
|
+
name: ''
|
302
|
+
url: ''
|
303
|
+
|
304
|
+
- regex: 'Feedspot'
|
305
|
+
name: 'Feedspot'
|
306
|
+
url: 'http://www.feedspot.com'
|
307
|
+
category: 'Feed Fetcher'
|
308
|
+
producer:
|
309
|
+
name: ''
|
310
|
+
url: ''
|
311
|
+
|
312
|
+
- regex: 'Fever'
|
313
|
+
name: 'Fever'
|
314
|
+
url: 'http://feedafever.com/'
|
315
|
+
category: 'Feed Fetcher'
|
316
|
+
producer:
|
317
|
+
name: ''
|
318
|
+
url: ''
|
319
|
+
|
320
|
+
- regex: 'Genieo'
|
321
|
+
name: 'Genieo Web filter'
|
322
|
+
category: ''
|
323
|
+
url: 'http://www.genieo.com/webfilter.html'
|
324
|
+
producer:
|
325
|
+
name: 'Genieo'
|
326
|
+
url: 'http://www.genieo.com'
|
327
|
+
|
328
|
+
- regex: 'ichiro/mobile goo'
|
329
|
+
name: 'Goo'
|
330
|
+
category: 'Search bot'
|
331
|
+
url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
|
332
|
+
producer:
|
333
|
+
name: 'NTT Resonant'
|
334
|
+
url: 'http://goo.ne.jp'
|
335
|
+
|
336
|
+
- regex: 'Google Page Speed Insights'
|
337
|
+
name: 'Google PageSpeed Insights'
|
338
|
+
category: 'Site Monitor'
|
339
|
+
url: 'http://developers.google.com/speed/pagespeed/insights/'
|
340
|
+
producer:
|
341
|
+
name: 'Google Inc.'
|
342
|
+
url: 'http://www.google.com'
|
343
|
+
|
344
|
+
- regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer'
|
345
|
+
name: 'Googlebot'
|
346
|
+
category: 'Search bot'
|
347
|
+
url: 'http://www.google.com/bot.html'
|
348
|
+
producer:
|
349
|
+
name: 'Google Inc.'
|
350
|
+
url: 'http://www.google.com'
|
351
|
+
|
352
|
+
- regex: 'heritrix'
|
353
|
+
name: 'Heritrix'
|
354
|
+
category: 'Crawler'
|
355
|
+
url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
|
356
|
+
producer:
|
357
|
+
name: 'The Internet Archive'
|
358
|
+
url: 'http://www.archive.org'
|
359
|
+
|
360
|
+
- regex: 'HTTPMon'
|
361
|
+
name: 'HTTPMon'
|
362
|
+
category: 'Site Monitor'
|
363
|
+
url: 'http://www.httpmon.com'
|
364
|
+
producer:
|
365
|
+
name: 'towards GmbH'
|
366
|
+
url: 'http://www.towards.ch/'
|
367
|
+
|
368
|
+
- regex: 'iisbot'
|
369
|
+
name: 'IIS Site Analysis'
|
370
|
+
category: 'crawler'
|
371
|
+
url: 'http://www.iis.net/iisbot.html'
|
372
|
+
producer:
|
373
|
+
name: 'Microsoft Corporation'
|
374
|
+
url: 'http://www.microsoft.com'
|
375
|
+
|
376
|
+
- regex: 'kouio'
|
377
|
+
name: 'Kouio'
|
378
|
+
url: 'http://kouio.com/'
|
379
|
+
category: 'Feed Fetcher'
|
380
|
+
producer:
|
381
|
+
name: ''
|
382
|
+
url: ''
|
383
|
+
|
384
|
+
- regex: 'linkdexbot(-mobile)?|linkdex.com'
|
385
|
+
name: 'Linkdex Bot'
|
386
|
+
category: 'Search bot'
|
387
|
+
url: 'http://www.linkdex.com/bots'
|
388
|
+
producer:
|
389
|
+
name: 'Mojeek Ltd.'
|
390
|
+
url: 'http://www.mojeek.com'
|
391
|
+
|
392
|
+
- regex: 'LinkedInBot'
|
393
|
+
name: 'LinkedIn Bot'
|
394
|
+
category: 'Social Media Agent'
|
395
|
+
url: 'http://www.linkedin.com'
|
396
|
+
producer:
|
397
|
+
name: 'LinkedIn'
|
398
|
+
url: 'http://www.linkedin.com'
|
399
|
+
|
400
|
+
- regex: 'Mail.RU(_Bot)?'
|
401
|
+
name: 'Mail.Ru Bot'
|
402
|
+
category: 'Search bot'
|
403
|
+
url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
|
404
|
+
producer:
|
405
|
+
name: 'Mail.Ru Group'
|
406
|
+
url: 'http://corp.mail.ru'
|
407
|
+
|
408
|
+
- regex: 'magpie-crawler'
|
409
|
+
name: 'Magpie-Crawler'
|
410
|
+
category: 'Social Media Agent'
|
411
|
+
url: 'http://www.brandwatch.com/magpie-crawler/'
|
412
|
+
producer:
|
413
|
+
name: 'Brandwatch'
|
414
|
+
url: 'http://www.brandwatch.com'
|
415
|
+
|
416
|
+
- regex: 'MagpieRSS'
|
417
|
+
name: 'MagpieRSS'
|
418
|
+
url: 'http://magpierss.sourceforge.net/'
|
419
|
+
category: 'Feed Parser'
|
420
|
+
producer:
|
421
|
+
name: ''
|
422
|
+
url: ''
|
423
|
+
|
424
|
+
- regex: 'meanpathbot'
|
425
|
+
name: 'Meanpath Bot'
|
426
|
+
category: 'Search bot'
|
427
|
+
url: 'http://www.meanpath.com/meanpathbot.html'
|
428
|
+
producer:
|
429
|
+
name: 'Meanpath'
|
430
|
+
url: 'http://www.meanpath.com'
|
431
|
+
|
432
|
+
- regex: 'MixrankBot'
|
433
|
+
name: 'Mixrank Bot'
|
434
|
+
category: 'Crawler'
|
435
|
+
url: 'http://mixrank.com'
|
436
|
+
producer:
|
437
|
+
name: 'Online Media Group, Inc.'
|
438
|
+
url: ''
|
439
|
+
|
440
|
+
- regex: 'MJ12bot'
|
441
|
+
name: 'MJ12 Bot'
|
442
|
+
category: 'Search bot'
|
443
|
+
url: 'http://majestic12.co.uk/bot.php'
|
444
|
+
producer:
|
445
|
+
name: 'Majestic-12'
|
446
|
+
url: 'http://majestic12.co.uk'
|
447
|
+
|
448
|
+
- regex: 'MojeekBot'
|
449
|
+
name: 'MojeekBot'
|
450
|
+
category: 'Search bot'
|
451
|
+
url: 'http://www.mojeek.com/bot.html'
|
452
|
+
producer:
|
453
|
+
name: 'Mojeek Ltd.'
|
454
|
+
url: 'http://www.mojeek.com'
|
455
|
+
|
456
|
+
- regex: 'NalezenCzBot'
|
457
|
+
name: 'NalezenCzBot'
|
458
|
+
category: 'Crawler'
|
459
|
+
url: 'http://www.nalezen.cz/about-crawler'
|
460
|
+
producer:
|
461
|
+
name: 'Jaroslav Kuboš'
|
462
|
+
url: ''
|
463
|
+
|
464
|
+
- regex: 'Netcraft Web Server Survey'
|
465
|
+
name: 'Netcraft Survey Bot'
|
466
|
+
category: 'Search bot'
|
467
|
+
url: ''
|
468
|
+
producer:
|
469
|
+
name: 'Netcraft'
|
470
|
+
url: 'http://www.netcraft.com'
|
471
|
+
|
472
|
+
- regex: 'Netvibes'
|
473
|
+
name: 'Netvibes'
|
474
|
+
url: 'http://www.netvibes.com/'
|
475
|
+
category: 'Feed Fetcher'
|
476
|
+
producer:
|
477
|
+
name: ''
|
478
|
+
url: ''
|
479
|
+
|
480
|
+
- regex: 'NewsBlur .*(Fetcher|Finder)'
|
481
|
+
name: 'NewsBlur'
|
482
|
+
url: 'http://www.newsblur.com'
|
483
|
+
category: 'Feed Fetcher'
|
484
|
+
producer:
|
485
|
+
name: ''
|
486
|
+
url: ''
|
487
|
+
|
488
|
+
- regex: 'NewsGatorOnline'
|
489
|
+
name: 'NewsGator'
|
490
|
+
url: 'http://www.newsgator.com'
|
491
|
+
category: 'Feed Fetcher'
|
492
|
+
producer:
|
493
|
+
name: ''
|
494
|
+
url: ''
|
495
|
+
|
496
|
+
- regex: 'nlcrawler'
|
497
|
+
name: 'NLCrawler'
|
498
|
+
category: 'Crawler'
|
499
|
+
url: ''
|
500
|
+
producer:
|
501
|
+
name: 'Northern Light'
|
502
|
+
url: 'http://northernlight.com'
|
503
|
+
|
504
|
+
- regex: 'omgilibot'
|
505
|
+
name: 'Omgili bot'
|
506
|
+
category: 'Search bot'
|
507
|
+
url: 'http://www.omgili.com/Crawler.html'
|
508
|
+
producer:
|
509
|
+
name: 'Omgili'
|
510
|
+
url: 'http://www.omgili.com'
|
511
|
+
|
512
|
+
- regex: 'OpenindexSpider'
|
513
|
+
name: 'Openindex Spider'
|
514
|
+
category: 'Search bot'
|
515
|
+
url: 'http://www.openindex.io/en/webmasters/spider.html'
|
516
|
+
producer:
|
517
|
+
name: 'Openindex B.V.'
|
518
|
+
url: 'http://www.openindex.io'
|
519
|
+
|
520
|
+
- regex: 'spbot'
|
521
|
+
name: 'OpenLinkProfiler'
|
522
|
+
category: 'Crawler'
|
523
|
+
url: 'http://openlinkprofiler.org/bot'
|
524
|
+
producer:
|
525
|
+
name: 'Axandra GmbH'
|
526
|
+
url: 'http://www.axandra.com'
|
527
|
+
|
528
|
+
- regex: 'OpenWebSpider'
|
529
|
+
name: 'OpenWebSpider'
|
530
|
+
category: 'Crawler'
|
531
|
+
url: 'http://www.openwebspider.org'
|
532
|
+
producer:
|
533
|
+
name: 'OpenWebSpider Lab'
|
534
|
+
url: 'http://lab.openwebspider.org'
|
535
|
+
|
536
|
+
- regex: 'PaperLiBot'
|
537
|
+
name: 'PaperLiBot'
|
538
|
+
category: 'Search bot'
|
539
|
+
url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
|
540
|
+
producer:
|
541
|
+
name: 'Smallrivers SA'
|
542
|
+
url: 'http://www.paper.li'
|
543
|
+
|
544
|
+
- regex: 'psbot(-page)?'
|
545
|
+
name: 'Picsearch bot'
|
546
|
+
category: 'Search bot'
|
547
|
+
url: 'http://www.picsearch.com/bot.html'
|
548
|
+
producer:
|
549
|
+
name: 'Picsearch'
|
550
|
+
url: 'http://www.picsearch.com'
|
551
|
+
|
552
|
+
- regex: 'Pingdom.com'
|
553
|
+
name: 'Pingdom Bot'
|
554
|
+
category: 'Site Monitors'
|
555
|
+
url: ''
|
556
|
+
producer:
|
557
|
+
name: 'Pingdom AB'
|
558
|
+
url: 'https://www.pingdom.com'
|
559
|
+
|
560
|
+
- regex: 'QuerySeekerSpider'
|
561
|
+
name: 'QuerySeekerSpider'
|
562
|
+
category: 'Crawler'
|
563
|
+
url: 'http://queryseeker.com/bot.html'
|
564
|
+
producer:
|
565
|
+
name: 'QueryEye Inc.'
|
566
|
+
url: 'http://queryeye.com'
|
567
|
+
|
568
|
+
- regex: 'redditbot'
|
569
|
+
name: 'Reddit Bot'
|
570
|
+
category: 'Social Media Agent'
|
571
|
+
url: 'http://www.reddit.com/feedback'
|
572
|
+
producer:
|
573
|
+
name: 'reddit inc.'
|
574
|
+
url: 'http://www.reddit.com'
|
575
|
+
|
576
|
+
- regex: 'rogerbot'
|
577
|
+
name: 'Rogerbot'
|
578
|
+
category: 'Crawler'
|
579
|
+
url: 'http://moz.com/help/pro/what-is-rogerbot-'
|
580
|
+
producer:
|
581
|
+
name: 'SEOmoz, Inc.'
|
582
|
+
url: 'http://moz.com/'
|
583
|
+
|
584
|
+
- regex: 'Screaming Frog SEO Spider'
|
585
|
+
name: 'Screaming Frog SEO Spider'
|
586
|
+
category: 'Crawler'
|
587
|
+
url: 'http://www.screamingfrog.co.uk/seo-spider'
|
588
|
+
producer:
|
589
|
+
name: 'Screaming Frog Ltd'
|
590
|
+
url: 'http://www.screamingfrog.co.uk'
|
591
|
+
|
592
|
+
- regex: 'ScreenerBot'
|
593
|
+
name: 'ScreenerBot'
|
594
|
+
category: 'Crawler'
|
595
|
+
url: 'http://www.screenerbot.com'
|
596
|
+
producer:
|
597
|
+
name: ''
|
598
|
+
url: ''
|
599
|
+
|
600
|
+
- regex: 'SemrushBot'
|
601
|
+
name: 'Semrush Bot'
|
602
|
+
category: 'Crawler'
|
603
|
+
url: 'http://www.semrush.com/bot.html'
|
604
|
+
producer:
|
605
|
+
name: 'SEMrush'
|
606
|
+
url: 'http://www.semrush.com'
|
607
|
+
|
608
|
+
- regex: 'SensikaBot'
|
609
|
+
name: 'Sensika Bot'
|
610
|
+
category: ''
|
611
|
+
url: ''
|
612
|
+
producer:
|
613
|
+
name: 'Sensika'
|
614
|
+
url: 'http://sensika.com'
|
615
|
+
|
616
|
+
- regex: 'SEOENG(World)?Bot'
|
617
|
+
name: 'SEOENGBot'
|
618
|
+
category: 'Crawler'
|
619
|
+
url: 'http://www.seoengine.com/seoengbot.htm'
|
620
|
+
producer:
|
621
|
+
name: 'SEO Engine'
|
622
|
+
url: 'http://www.seoengine.com'
|
623
|
+
|
624
|
+
- regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
|
625
|
+
name: 'Seznam Bot'
|
626
|
+
category: 'Search bot'
|
627
|
+
url: 'http://www.mapy.cz/cz/seznambot.html'
|
628
|
+
producer:
|
629
|
+
name: 'Seznam.cz, a.s.'
|
630
|
+
url: 'http://www.seznam.cz/'
|
631
|
+
|
632
|
+
- regex: 'ShopWiki'
|
633
|
+
name: 'ShopWiki'
|
634
|
+
category: 'Search tools'
|
635
|
+
url: 'http://www.shopwiki.com/wiki/Help:Bot'
|
636
|
+
producer:
|
637
|
+
name: 'ShopWiki Corp.'
|
638
|
+
url: 'http://www.shopwiki.com'
|
639
|
+
|
640
|
+
- regex: 'SilverReader'
|
641
|
+
name: 'SilverReader'
|
642
|
+
url: 'http://silverreader.com'
|
643
|
+
category: 'Feed Fetcher'
|
644
|
+
producer:
|
645
|
+
name: ''
|
646
|
+
url: ''
|
647
|
+
|
648
|
+
- regex: 'SimplePie'
|
649
|
+
name: 'SimplePie'
|
650
|
+
url: 'http://www.simplepie.org'
|
651
|
+
category: 'Feed Parser'
|
652
|
+
producer:
|
653
|
+
name: ''
|
654
|
+
url: ''
|
655
|
+
|
656
|
+
- regex: 'SISTRIX Crawler'
|
657
|
+
name: 'SISTRIX Crawler'
|
658
|
+
category: 'Crawler'
|
659
|
+
url: 'http://crawler.sistrix.net'
|
660
|
+
producer:
|
661
|
+
name: 'SISTRIX GmbH'
|
662
|
+
url: 'http://www.sistrix.de'
|
663
|
+
|
664
|
+
- regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
|
665
|
+
name: 'Sogou Spider'
|
666
|
+
category: 'Search bot'
|
667
|
+
url: 'http://www.sogou.com/docs/help/webmasters.htm'
|
668
|
+
producer:
|
669
|
+
name: 'Sohu, Inc.'
|
670
|
+
url: 'http://www.sogou.com'
|
671
|
+
|
672
|
+
- regex: 'Sosospider|Sosoimagespider'
|
673
|
+
name: 'Soso Spider'
|
674
|
+
category: 'Search bot'
|
675
|
+
url: 'http://help.soso.com/webspider.htm'
|
676
|
+
producer:
|
677
|
+
name: 'Tencent Holdings'
|
678
|
+
url: 'http://www.soso.com'
|
679
|
+
|
680
|
+
- regex: 'Superfeedr bot'
|
681
|
+
name: 'Superfeedr Bot'
|
682
|
+
category: 'Feed Fetcher'
|
683
|
+
url: ''
|
684
|
+
producer:
|
685
|
+
name: 'Superfeedr'
|
686
|
+
url: 'https://superfeedr.com/'
|
687
|
+
|
688
|
+
- regex: 'Spinn3r'
|
689
|
+
name: 'Spinn3r'
|
690
|
+
category: 'Crawler'
|
691
|
+
url: 'http://spinn3r.com/robot'
|
692
|
+
producer:
|
693
|
+
name: 'Tailrank Inc'
|
694
|
+
url: 'http://spinn3r.com'
|
695
|
+
|
696
|
+
- regex: 'Sputnik(Image)?Bot'
|
697
|
+
name: 'Sputnik Bot'
|
698
|
+
category: ''
|
699
|
+
url: ''
|
700
|
+
producer:
|
701
|
+
name: ''
|
702
|
+
url: ''
|
703
|
+
|
704
|
+
- regex: 'SurveyBot'
|
705
|
+
name: 'Survey Bot'
|
706
|
+
category: 'Search bot'
|
707
|
+
url: 'http://www.domaintools.com/webmasters/surveybot.php'
|
708
|
+
producer:
|
709
|
+
name: 'Domain Tools'
|
710
|
+
url: 'http://www.domaintools.com'
|
711
|
+
|
712
|
+
- regex: 'TinEye-bot'
|
713
|
+
name: 'TinEye Crawler'
|
714
|
+
category: 'Search bot'
|
715
|
+
url: 'http://www.tineye.com/crawler.html'
|
716
|
+
producer:
|
717
|
+
name: 'Idée Inc.'
|
718
|
+
url: 'http://ideeinc.com'
|
719
|
+
|
720
|
+
- regex: 'Tiny Tiny RSS'
|
721
|
+
name: 'Tiny Tiny RSS'
|
722
|
+
url: 'http://tt-rss.org'
|
723
|
+
category: 'Feed Fetcher'
|
724
|
+
producer:
|
725
|
+
name: ''
|
726
|
+
url: ''
|
727
|
+
|
728
|
+
- regex: 'TurnitinBot'
|
729
|
+
name: 'TurnitinBot'
|
730
|
+
category: 'Crawler'
|
731
|
+
url: 'http://www.turnitin.com/robot/crawlerinfo.html'
|
732
|
+
producer:
|
733
|
+
name: 'iParadigms, LLC.'
|
734
|
+
url: 'http://www.turnitin.com'
|
735
|
+
|
736
|
+
- regex: 'TweetedTimes Bot'
|
737
|
+
name: 'TweetedTimes Bot'
|
738
|
+
category: 'Crawler'
|
739
|
+
url: 'http://tweetedtimes.com'
|
740
|
+
producer:
|
741
|
+
name: 'TweetedTimes'
|
742
|
+
url: 'http://tweetedtimes.com/'
|
743
|
+
|
744
|
+
- regex: 'TweetmemeBot'
|
745
|
+
name: 'Tweetmeme Bot'
|
746
|
+
category: 'Crawler'
|
747
|
+
url: 'http://tweetmeme.com/'
|
748
|
+
producer:
|
749
|
+
name: 'Mediasift'
|
750
|
+
url: ''
|
751
|
+
|
752
|
+
- regex: 'Twitterbot'
|
753
|
+
name: 'Twitterbot'
|
754
|
+
category: 'Social Media Agent'
|
755
|
+
url: 'https://dev.twitter.com/docs/cards/getting-started'
|
756
|
+
producer:
|
757
|
+
name: 'Twitter'
|
758
|
+
url: 'http://www.twitter.com'
|
759
|
+
|
760
|
+
- regex: 'UptimeRobot'
|
761
|
+
name: 'Uptime Robot'
|
762
|
+
category: 'Site Monitor'
|
763
|
+
url: ''
|
764
|
+
producer:
|
765
|
+
name: 'Uptime Robot'
|
766
|
+
url: 'http://uptimerobot.com'
|
767
|
+
|
768
|
+
- regex: 'URLAppendBot'
|
769
|
+
name: 'URLAppendBot'
|
770
|
+
category: 'Crawler'
|
771
|
+
url: 'http://www.profound.net/urlappendbot.html'
|
772
|
+
producer:
|
773
|
+
name: 'Profound Networks'
|
774
|
+
url: 'http://www.profound.net'
|
775
|
+
|
776
|
+
- regex: 'VSMCrawler'
|
777
|
+
name: 'Visual Site Mapper Crawler'
|
778
|
+
category: 'Crawler'
|
779
|
+
url: 'http://www.visualsitemapper.com/crawler'
|
780
|
+
producer:
|
781
|
+
name: 'Alentum Software Ltd.'
|
782
|
+
url: 'http://www.alentum.com'
|
783
|
+
|
784
|
+
- regex: 'VoilaBot'
|
785
|
+
name: 'Voila Bot'
|
786
|
+
category: 'Search bot'
|
787
|
+
url: 'http://www.voila.fr'
|
788
|
+
producer:
|
789
|
+
name: ''
|
790
|
+
url: ''
|
791
|
+
|
792
|
+
- regex: 'Jigsaw'
|
793
|
+
name: 'W3C CSS Validator'
|
794
|
+
category: 'Validator'
|
795
|
+
url: 'http://jigsaw.w3.org/css-validator'
|
796
|
+
producer:
|
797
|
+
name: 'W3C'
|
798
|
+
url: 'http://www.w3.org'
|
799
|
+
|
800
|
+
- regex: 'W3C_I18n-Checker'
|
801
|
+
name: 'W3C I18N Checker'
|
802
|
+
category: 'Validator'
|
803
|
+
url: 'http://validator.w3.org/i18n-checker'
|
804
|
+
producer:
|
805
|
+
name: 'W3C'
|
806
|
+
url: 'http://www.w3.org'
|
807
|
+
|
808
|
+
- regex: 'W3C-checklink'
|
809
|
+
name: 'W3C Link Checker'
|
810
|
+
category: 'Validator'
|
811
|
+
url: 'http://validator.w3.org/checklink'
|
812
|
+
producer:
|
813
|
+
name: 'W3C'
|
814
|
+
url: 'http://www.w3.org'
|
815
|
+
|
816
|
+
- regex: 'W3C_Validator'
|
817
|
+
name: 'W3C Markup Validation Service'
|
818
|
+
category: 'Validator'
|
819
|
+
url: 'http://validator.w3.org/services'
|
820
|
+
producer:
|
821
|
+
name: 'W3C'
|
822
|
+
url: 'http://www.w3.org'
|
823
|
+
|
824
|
+
- regex: 'W3C-mobileOK'
|
825
|
+
name: 'W3C MobileOK Checker'
|
826
|
+
category: 'Validator'
|
827
|
+
url: 'http://validator.w3.org/mobile'
|
828
|
+
producer:
|
829
|
+
name: 'W3C'
|
830
|
+
url: 'http://www.w3.org'
|
831
|
+
|
832
|
+
- regex: 'W3C_Unicorn'
|
833
|
+
name: 'W3C Unified Validator'
|
834
|
+
category: 'Validator'
|
835
|
+
url: 'http://validator.w3.org/unicorn'
|
836
|
+
producer:
|
837
|
+
name: 'W3C'
|
838
|
+
url: 'http://www.w3.org'
|
839
|
+
|
840
|
+
- regex: 'WeSEE(:Search)?'
|
841
|
+
name: 'WeSEE:Search'
|
842
|
+
category: 'Search bot'
|
843
|
+
url: 'http://www.wesee.com/bot'
|
844
|
+
producer:
|
845
|
+
name: 'WeSEE Ltd'
|
846
|
+
url: 'http://www.wesee.com'
|
847
|
+
|
848
|
+
- regex: 'WebbCrawler'
|
849
|
+
name: 'WebbCrawler'
|
850
|
+
category: 'Crawler'
|
851
|
+
url: 'http://badcheese.com/crawler.html'
|
852
|
+
producer:
|
853
|
+
name: 'Steve Webb'
|
854
|
+
url: 'http://badcheese.com'
|
855
|
+
|
856
|
+
- regex: 'Wotbox'
|
857
|
+
name: 'Wotbox'
|
858
|
+
category: 'Search bot'
|
859
|
+
url: 'http://www.wotbox.com/bot/'
|
860
|
+
producer:
|
861
|
+
name: 'Wotbox'
|
862
|
+
url: 'http://www.wotbox.com'
|
863
|
+
|
864
|
+
- regex: 'yacybot'
|
865
|
+
name: 'YaCy'
|
866
|
+
category: 'Search bot'
|
867
|
+
url: 'http://yacy.net/bot.html'
|
868
|
+
producer:
|
869
|
+
name: 'YaCy'
|
870
|
+
url: 'http://yacy.net'
|
871
|
+
|
872
|
+
- regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
|
873
|
+
name: 'Yahoo! Slurp'
|
874
|
+
category: 'Search bot'
|
875
|
+
url: 'http://help.yahoo.com/ysearch/slurp'
|
876
|
+
producer:
|
877
|
+
name: 'Yahoo! Inc.'
|
878
|
+
url: 'http://www.yahoo.com'
|
879
|
+
|
880
|
+
- regex: 'Yandex(Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|.Gazeta Bot)'
|
881
|
+
name: 'Yandex Bot'
|
882
|
+
category: 'Search bot'
|
883
|
+
url: 'http://www.yandex.com/bots'
|
884
|
+
producer:
|
885
|
+
name: 'Yandex LLC'
|
886
|
+
url: 'http://company.yandex.com'
|
887
|
+
|
888
|
+
- regex: 'Yeti'
|
889
|
+
name: 'Yeti/Naverbot'
|
890
|
+
category: 'Search bot'
|
891
|
+
url: 'http://help.naver.com/robots/'
|
892
|
+
producer:
|
893
|
+
name: 'Naver'
|
894
|
+
url: 'http://www.naver.com'
|
895
|
+
|
896
|
+
- regex: 'YoudaoBot'
|
897
|
+
name: 'Youdao Bot'
|
898
|
+
category: 'Search bot'
|
899
|
+
url: 'http://www.youdao.com/help/webmaster/spider'
|
900
|
+
producer:
|
901
|
+
name: 'NetEase, Inc.'
|
902
|
+
url: 'http://corp.163.com'
|
903
|
+
|
904
|
+
- regex: 'YRSpider|YYSpider'
|
905
|
+
name: 'Yunyun Bot'
|
906
|
+
category: 'Search bot'
|
907
|
+
url: 'http://www.yunyun.com/SiteInfo.php?r=about'
|
908
|
+
producer:
|
909
|
+
name: 'YunYun'
|
910
|
+
url: 'http://www.yunyun.com'
|
911
|
+
|
912
|
+
- regex: 'Zookabot'
|
913
|
+
name: 'Zookabot'
|
914
|
+
category: 'Crawler'
|
915
|
+
url: 'http://zookabot.com'
|
916
|
+
producer:
|
917
|
+
name: 'Hwacha ApS'
|
918
|
+
url: 'http://hwacha.dk'
|
919
|
+
|
920
|
+
- regex: 'ZumBot'
|
921
|
+
name: 'ZumBot'
|
922
|
+
category: 'Search bot'
|
923
|
+
url: 'http://help.zum.com/inquiry'
|
924
|
+
producer:
|
925
|
+
name: 'ZUM internet'
|
926
|
+
url: 'http://www.zuminternet.com/'
|
927
|
+
|
928
|
+
- regex: 'YottaaMonitor'
|
929
|
+
name: 'Yottaa Site Monitor'
|
930
|
+
category: 'Site Monitor'
|
931
|
+
url: 'http://www.yottaa.com/products/site-monitor'
|
932
|
+
producer:
|
933
|
+
name: 'Yottaa'
|
934
|
+
url: 'http://www.yottaa.com/'
|
935
|
+
|
936
|
+
|
937
|
+
|
938
|
+
- regex: 'lycos'
|
939
|
+
name: 'Lycos'
|
940
|
+
|
941
|
+
- regex: 'Slurp'
|
942
|
+
name: 'Inktomi Slurp'
|
943
|
+
|
944
|
+
- regex: 'Speedy Spider'
|
945
|
+
name: 'Speedy'
|
946
|
+
|
947
|
+
- regex: 'ScoutJet'
|
948
|
+
name: 'ScoutJet'
|
949
|
+
|
950
|
+
- regex: 'nrsbot|netresearch'
|
951
|
+
name: 'NetResearchServer'
|
952
|
+
|
953
|
+
- regex: 'scooter'
|
954
|
+
name: 'Scooter'
|
955
|
+
|
956
|
+
- regex: 'gigabot'
|
957
|
+
name: 'Gigabot'
|
958
|
+
|
959
|
+
- regex: 'charlotte'
|
960
|
+
name: 'Charlotte'
|
961
|
+
|
962
|
+
- regex: 'Pompos'
|
963
|
+
name: 'Pompos'
|
964
|
+
|
965
|
+
- regex: 'ichiro'
|
966
|
+
name: 'ichiro'
|
967
|
+
|
968
|
+
- regex: 'PagePeeker'
|
969
|
+
name: 'PagePeeker'
|
970
|
+
|
971
|
+
- regex: 'WebThumbnail'
|
972
|
+
name: 'WebThumbnail'
|
973
|
+
|
974
|
+
- regex: 'Willow Internet Crawler'
|
975
|
+
name: 'Willow Internet Crawler'
|
976
|
+
|
977
|
+
- regex: 'EmailWolf'
|
978
|
+
name: 'EmailWolf'
|
979
|
+
|
980
|
+
|
981
|
+
- regex: '(nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex|zao|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
|
982
|
+
name: 'Bot'
|
983
|
+
|
984
|
+
# Generic detections
|
985
|
+
|
986
|
+
- regex: 'Nutch'
|
987
|
+
name: 'Nutch-based Bot'
|
988
|
+
category: 'crawler'
|
989
|
+
url: 'https://nutch.apache.org'
|
990
|
+
producer:
|
991
|
+
name: 'The Apache Software Foundation'
|
992
|
+
url: 'http://www.apache.org/foundation/'
|
993
|
+
|
994
|
+
# Original:
|
995
|
+
# - regex: '[a-z0-9-_]*(bot|crawler|archiver|transcoder|spider)'
|
996
|
+
# name: 'Generic Bot'
|
997
|
+
#
|
998
|
+
# Adapted for Ruby:
|
999
|
+
# Note the backslash added in [a-z0-9\-_], it is necessary to avoid
|
1000
|
+
# warnings from the interpreter
|
1001
|
+
- regex: '[a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider)'
|
1002
|
+
name: 'Generic Bot'
|
@@ -48,6 +48,22 @@ RSpec.describe DeviceDetector do
|
|
48
48
|
|
49
49
|
end
|
50
50
|
|
51
|
+
describe '#bot?' do
|
52
|
+
|
53
|
+
it 'returns false' do
|
54
|
+
expect(client.bot?).to eq(false)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
describe '#bot_name' do
|
60
|
+
|
61
|
+
it 'returns nil' do
|
62
|
+
expect(client.bot_name).to be_nil
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
51
67
|
end
|
52
68
|
|
53
69
|
context 'unknown user agent' do
|
@@ -94,5 +110,83 @@ RSpec.describe DeviceDetector do
|
|
94
110
|
|
95
111
|
end
|
96
112
|
|
113
|
+
describe '#bot?' do
|
114
|
+
|
115
|
+
it 'returns false' do
|
116
|
+
expect(client.bot?).to eq(false)
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
120
|
+
|
121
|
+
describe '#bot_name' do
|
122
|
+
|
123
|
+
it 'returns nil' do
|
124
|
+
expect(client.bot_name).to be_nil
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
|
129
|
+
end
|
130
|
+
|
131
|
+
context 'bot' do
|
132
|
+
|
133
|
+
let(:user_agent) { 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' }
|
134
|
+
|
135
|
+
describe '#name' do
|
136
|
+
|
137
|
+
it 'returns nil' do
|
138
|
+
expect(client.name).to be_nil
|
139
|
+
end
|
140
|
+
|
141
|
+
end
|
142
|
+
|
143
|
+
describe '#full_version' do
|
144
|
+
|
145
|
+
it 'returns nil' do
|
146
|
+
expect(client.full_version).to be_nil
|
147
|
+
end
|
148
|
+
|
149
|
+
end
|
150
|
+
|
151
|
+
describe '#os_name' do
|
152
|
+
|
153
|
+
it 'returns nil' do
|
154
|
+
expect(client.os_name).to be_nil
|
155
|
+
end
|
156
|
+
|
157
|
+
end
|
158
|
+
|
159
|
+
describe '#os_full_version' do
|
160
|
+
|
161
|
+
it 'returns nil' do
|
162
|
+
expect(client.os_full_version).to be_nil
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
|
167
|
+
describe '#known?' do
|
168
|
+
|
169
|
+
it 'returns false' do
|
170
|
+
expect(client.known?).to eq(false)
|
171
|
+
end
|
172
|
+
|
173
|
+
end
|
174
|
+
|
175
|
+
describe '#bot?' do
|
176
|
+
|
177
|
+
it 'returns true' do
|
178
|
+
expect(client.bot?).to eq(true)
|
179
|
+
end
|
180
|
+
|
181
|
+
end
|
182
|
+
|
183
|
+
describe '#bot_name' do
|
184
|
+
|
185
|
+
it 'returns the name of the bot' do
|
186
|
+
expect(client.bot_name).to eq('Googlebot')
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
97
191
|
end
|
98
192
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: device_detector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mati Sójka
|
@@ -60,11 +60,13 @@ files:
|
|
60
60
|
- Rakefile
|
61
61
|
- device_detector.gemspec
|
62
62
|
- lib/device_detector.rb
|
63
|
+
- lib/device_detector/bot.rb
|
63
64
|
- lib/device_detector/client.rb
|
64
65
|
- lib/device_detector/os.rb
|
65
66
|
- lib/device_detector/parser.rb
|
66
67
|
- lib/device_detector/version.rb
|
67
68
|
- lib/device_detector/version_extractor.rb
|
69
|
+
- regexes/bots.yml
|
68
70
|
- regexes/browser_engines.yml
|
69
71
|
- regexes/browsers.yml
|
70
72
|
- regexes/feed_readers.yml
|