device_detector 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e3756592136cc647379275944c3e5dd622d3204e
4
- data.tar.gz: 49abbfad35f18ed4e585ff656369e113da6407e4
3
+ metadata.gz: 3b1d7d2d1f51f584026667fa8fff86b9d4cf994d
4
+ data.tar.gz: dc71e14d78d7432fd03ea583fbaaf039a1aa3242
5
5
  SHA512:
6
- metadata.gz: a296b5604453c8cbcbdc82882e8429900a015eb28567df7dfe7f9eedbf7c5d4f02fce372d11b285eae5c99b8e9680d5ce7d3a58a605cc6ebcf375336c80c5605
7
- data.tar.gz: 53bd317b7a28fdbfb3a1386b7989c8aaa77fc78062ba8071f1063520c03ed16aced508b9c2d7935acef8a69f28aac284cf6fd63d4a4489350db69c95e2fc2b74
6
+ metadata.gz: 956b2f1762edd6106dd96a8c920b88d0e1b323552790ba46d533063028802c4ca91f739a4c49d95a4ade4f273f1abad48a2a73781177ccdfd776857c45e087cc
7
+ data.tar.gz: 49a4bbc2a254452296f155e11837772afb3a4399308a18eb035c98ab0f7ead38ab850f0650b802043c4701afe077788ebae5a1ae33d52e4a2d2d6926281f6de6
data/README.md CHANGED
@@ -1,8 +1,20 @@
1
1
  # DeviceDetector
2
2
 
3
- This is a Ruby port of the Universal Device Detection library. You can find the original code here: [https://github.com/piwik/device-detector].
3
+ This is a Ruby port of the Universal Device Detection library.
4
+ You can find the original code here: [https://github.com/piwik/device-detector].
4
5
 
5
- The Universal Device Detection library will parse any User Agent and detect the browser, operating system, device used (desktop, tablet, mobile, tv, cars, console, etc.), brand and model.
6
+ The Universal Device Detection library will parse any User Agent and detect
7
+ the browser, operating system, device used (desktop, tablet, mobile, tv, cars,
8
+ console, etc.), brand and model.
9
+
10
+ ## Disclaimer
11
+
12
+ This port does not aspire to be a one-to-one copy from the original code, but
13
+ rather an adaptation for the Ruby language.
14
+
15
+ Still, our goal is to use the original, unchanged regex yaml files, in order to
16
+ mutually benefit from updates and pull request to both the original and the
17
+ ported versions.
6
18
 
7
19
  ## Installation
8
20
 
@@ -6,13 +6,12 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
6
  require 'device_detector/version'
7
7
  require 'device_detector/version_extractor'
8
8
  require 'device_detector/parser'
9
+ require 'device_detector/bot'
9
10
  require 'device_detector/client'
10
11
  require 'device_detector/os'
11
12
 
12
13
  class DeviceDetector
13
14
 
14
- ROOT = Pathname.new(File.expand_path('../..', __FILE__))
15
-
16
15
  attr_reader :user_agent
17
16
 
18
17
  def initialize(user_agent)
@@ -39,8 +38,20 @@ class DeviceDetector
39
38
  client.known?
40
39
  end
41
40
 
41
+ def bot?
42
+ bot.bot?
43
+ end
44
+
45
+ def bot_name
46
+ bot.name
47
+ end
48
+
42
49
  private
43
50
 
51
+ def bot
52
+ @bot ||= Bot.new(user_agent)
53
+ end
54
+
44
55
  def client
45
56
  @client ||= Client.new(user_agent)
46
57
  end
@@ -0,0 +1,15 @@
1
+ class DeviceDetector
2
+ class Bot < Parser
3
+
4
+ def bot?
5
+ regex_meta.any?
6
+ end
7
+
8
+ private
9
+
10
+ def filenames
11
+ ['bots.yml']
12
+ end
13
+
14
+ end
15
+ end
@@ -29,10 +29,13 @@ class DeviceDetector
29
29
 
30
30
  def filepaths
31
31
  filenames.map do |filename|
32
- File.join(DeviceDetector::ROOT, 'regexes', filename)
32
+ File.join(root, 'regexes', filename)
33
33
  end
34
34
  end
35
35
 
36
+ def root
37
+ Pathname.new(File.expand_path('../../..', __FILE__))
38
+ end
36
39
 
37
40
  end
38
41
  end
@@ -1,3 +1,3 @@
1
1
  class DeviceDetector
2
- VERSION = '0.1.0'
2
+ VERSION = '0.2.0'
3
3
  end
data/regexes/bots.yml ADDED
@@ -0,0 +1,1002 @@
1
+ ###############
2
+ # Device Detector - The Universal Device Detection library for parsing User Agents
3
+ #
4
+ # @link http://piwik.org
5
+ # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
+ ###############
7
+
8
+ - regex: '360Spider(-Image|-Video)?'
9
+ name: '360Spider'
10
+ category: 'Search bot'
11
+ url: 'http://www.so.com/help/help_3_2.html'
12
+ producer:
13
+ name: 'Online Media Group, Inc.'
14
+ url: ''
15
+
16
+ - regex: 'Aboundex'
17
+ name: 'Aboundexbot'
18
+ category: 'Search bot'
19
+ url: 'http://www.aboundex.com/crawler/'
20
+ producer:
21
+ name: 'Aboundex.com'
22
+ url: 'http://www.aboundex.com'
23
+
24
+ - regex: 'AcoonBot'
25
+ name: 'Acoon'
26
+ category: 'Search bot'
27
+ url: 'http://www.acoon.de/robot.asp'
28
+ producer:
29
+ name: 'Acoon GmbH'
30
+ url: 'http://www.acoon.de'
31
+
32
+ - regex: 'AddThis.com'
33
+ name: 'AddThis.com'
34
+ category: 'Social Media Agent'
35
+ url: ''
36
+ producer:
37
+ name: 'Clearspring Technologies, Inc.'
38
+ url: 'http://www.clearspring.com'
39
+
40
+ - regex: 'AhrefsBot'
41
+ name: 'aHrefs Bot'
42
+ category: 'Crawler'
43
+ url: 'http://ahrefs.com/robot'
44
+ producer:
45
+ name: 'Ahrefs Pte Ltd'
46
+ url: 'http://ahrefs.com/robot'
47
+
48
+ - regex: 'ia_archiver|alexabot|verifybot'
49
+ name: 'Alexa Crawler'
50
+ category: 'Search bot'
51
+ url: 'https://alexa.zendesk.com/hc/en-us/sections/200100794-Crawlers'
52
+ producer:
53
+ name: 'Alexa Internet'
54
+ url: 'http://www.alexa.com'
55
+
56
+ - regex: 'AmorankSpider'
57
+ name: 'Amorank Spider'
58
+ category: 'Crawler'
59
+ url: 'http://amorank.com/webcrawler.html'
60
+ producer:
61
+ name: 'Amorank'
62
+ url: 'http://www.amorank.com'
63
+
64
+ - regex: 'Curious George'
65
+ name: 'Analytics SEO Crawler'
66
+ category: 'Crawler'
67
+ url: 'http://www.analyticsseo.com/crawler'
68
+ producer:
69
+ name: 'Analytics SEO'
70
+ url: 'http://www.analyticsseo.com'
71
+
72
+ - regex: 'archive.org_bot|special_archiver'
73
+ name: 'archive.org bot'
74
+ category: 'Crawler'
75
+ url: 'http://www.archive.org/details/archive.org_bot'
76
+ producer:
77
+ name: 'The Internet Archive'
78
+ url: 'http://www.archive.org'
79
+
80
+ - regex: 'Ask Jeeves/Teoma'
81
+ name: 'Ask Jeeves'
82
+ category: 'Search bot'
83
+ url: ''
84
+ producer:
85
+ name: 'Ask Jeeves Inc.'
86
+ url: 'http://www.ask.com'
87
+
88
+ - regex: 'Backlink-Ceck.de'
89
+ name: 'Backlink-Ceck.de'
90
+ category: 'Crawler'
91
+ url: 'http://www.backlink-check.de/bot.html'
92
+ producer:
93
+ name: 'Mediagreen Medienservice'
94
+ url: 'http://www.backlink-check.de'
95
+
96
+ - regex: 'BacklinkCrawler'
97
+ name: 'BacklinkCrawler'
98
+ category: 'Crawler'
99
+ url: 'http://www.backlinktest.com/crawler.html'
100
+ producer:
101
+ name: '2.0Promotion GbR'
102
+ url: 'http://www.backlinktest.com'
103
+
104
+ - regex: 'baiduspider(-image)?|baidu Transcoder|baidu.*spider'
105
+ name: 'Baidu Spider'
106
+ category: 'Search bot'
107
+ url: 'http://www.baidu.com/search/spider.htm'
108
+ producer:
109
+ name: 'Baidu'
110
+ url: 'http://www.baidu.com'
111
+
112
+ - regex: 'MSNBot|msrbot|bingbot|BingPreview|msnbot-(UDiscovery|NewsBlogs)|adidxbot'
113
+ name: 'BingBot'
114
+ category: 'Search bot'
115
+ url: 'http://search.msn.com/msnbot.htmn'
116
+ producer:
117
+ name: 'Microsoft Corporation'
118
+ url: 'http://www.microsoft.com'
119
+
120
+ - regex: 'Blekkobot'
121
+ name: 'Blekkobot'
122
+ category: 'Search bot'
123
+ url: 'http://blekko.com/about/blekkobot'
124
+ producer:
125
+ name: 'Blekko'
126
+ url: 'http://blekko.com'
127
+
128
+ - regex: 'BLEXBot(Test)?'
129
+ name: 'BLEXBot Crawler'
130
+ category: 'Crawler'
131
+ url: 'http://webmeup-crawler.com'
132
+ producer:
133
+ name: 'WebMeUp'
134
+ url: 'http://webmeup.com'
135
+
136
+ - regex: 'Bloglovin'
137
+ name: 'Bloglovin'
138
+ url: 'http://www.bloglovin.com'
139
+ category: 'Feed Fetcher'
140
+ producer:
141
+ name: ''
142
+ url: ''
143
+
144
+ - regex: 'BountiiBot'
145
+ name: 'Bountii Bot'
146
+ category: 'Search bot'
147
+ url: 'http://bountii.com/contact.php'
148
+ producer:
149
+ name: 'Bountii Inc.'
150
+ url: 'http://bountii.com'
151
+
152
+ - regex: 'Browsershots'
153
+ name: 'Browsershots'
154
+ category: 'Service Agent'
155
+ url: 'http://browsershots.org/faq'
156
+ producer:
157
+ name: 'Browsershots.org'
158
+ url: 'http://browsershots.org'
159
+
160
+ - regex: '(?<!HTC)[ _]Butterfly'
161
+ name: 'Butterfly Robot'
162
+ category: 'Search bot'
163
+ url: 'http://labs.topsy.com/butterfly'
164
+ producer:
165
+ name: 'Topsy Labs'
166
+ url: 'http://labs.topsy.com'
167
+
168
+ - regex: 'CareerBot'
169
+ name: 'CareerBot'
170
+ category: 'Crawler'
171
+ url: 'http://www.career-x.de/bot.html'
172
+ producer:
173
+ name: 'career-x GmbH'
174
+ url: 'http://www.career-x.de'
175
+
176
+ - regex: 'CCBot'
177
+ name: 'ccBot crawler'
178
+ category: 'Crawler'
179
+ url: 'http://commoncrawl.org/faq/'
180
+ producer:
181
+ name: 'reddit inc.'
182
+ url: 'http://www.reddit.com'
183
+
184
+ - regex: 'Cliqzbot'
185
+ name: 'Cliqzbot'
186
+ category: 'Crawler'
187
+ url: 'http://cliqz.com/company/cliqzbot'
188
+ producer:
189
+ name: '10betterpages GmbH'
190
+ url: 'http://cliqz.com'
191
+
192
+ - regex: 'CloudFlare-AlwaysOnline'
193
+ name: 'CloudFlare Always Online'
194
+ category: 'Site Monitor'
195
+ url: 'http://www.cloudflare.com/always-online'
196
+ producer:
197
+ name: 'CloudFlare'
198
+ url: 'http://www.cloudflare.com'
199
+
200
+ - regex: 'CommaFeed'
201
+ name: 'CommaFeed'
202
+ url: 'http://www.commafeed.com'
203
+ category: 'Feed Fetcher'
204
+ producer:
205
+ name: ''
206
+ url: ''
207
+
208
+ - regex: 'Dazoobot'
209
+ name: 'Dazoobot'
210
+ category: 'Search bot'
211
+ url: ''
212
+ producer:
213
+ name: 'DAZOO.FR'
214
+ url: 'http://dazoo.fr'
215
+
216
+ - regex: 'discobot(-news)?'
217
+ name: 'Discobot'
218
+ category: 'Search bot'
219
+ url: 'http://discoveryengine.com/discobot.html'
220
+ producer:
221
+ name: 'Discovery Engine'
222
+ url: 'http://discoveryengine.com'
223
+
224
+ - regex: 'DotBot'
225
+ name: 'DotBot'
226
+ category: 'Crawler'
227
+ url: 'http://www.opensiteexplorer.org/dotbot'
228
+ producer:
229
+ name: 'SEOmoz, Inc.'
230
+ url: 'http://moz.com/'
231
+
232
+ - regex: 'EasouSpider'
233
+ name: 'Easou Spider'
234
+ category: 'Search bot'
235
+ url: 'http://www.easou.com/search/spider.html'
236
+ producer:
237
+ name: 'easou ICP'
238
+ url: 'http://www.easou.com'
239
+
240
+ - regex: 'EMail Exractor'
241
+ name: 'EMail Exractor'
242
+ category: 'Crawler'
243
+ url: ''
244
+ producer:
245
+ name: ''
246
+ url: ''
247
+
248
+ - regex: 'Exabot(-Thumbnails|-Images)?|ExaleadCloudview'
249
+ name: 'ExaBot'
250
+ category: 'Crawler'
251
+ url: 'http://www.exabot.com/go/robot'
252
+ producer:
253
+ name: 'Dassault Systèmes'
254
+ url: 'http://www.3ds.com'
255
+
256
+ - regex: 'ExactSeek Crawler'
257
+ name: 'ExactSeek Crawler'
258
+ category: 'Search bot'
259
+ url: 'http://www.exactseek.com'
260
+ producer:
261
+ name: 'Jayde Online, Inc.'
262
+ url: 'http://www.jaydeonlineinc.com'
263
+
264
+ - regex: 'Ezooms'
265
+ name: 'Ezooms'
266
+ category: 'Crawler'
267
+ url: ''
268
+ producer:
269
+ name: 'SEOmoz, Inc.'
270
+ url: 'http://moz.com/'
271
+
272
+ - regex: 'facebookexternalhit|facebookplatform'
273
+ name: 'Facebook External Hit'
274
+ category: 'Social Media Agent'
275
+ url: 'https://www.facebook.com/externalhit_uatext.php'
276
+ producer:
277
+ name: 'Facebook'
278
+ url: 'http://www.facebook.com'
279
+
280
+ - regex: 'Feedbin'
281
+ name: 'Feedbin'
282
+ url: 'http://feedbin.com/'
283
+ category: 'Feed Fetcher'
284
+ producer:
285
+ name: ''
286
+ url: ''
287
+
288
+ - regex: 'FeedBurner'
289
+ name: 'FeedBurner'
290
+ url: 'http://www.feedburner.com'
291
+ category: 'Feed Fetcher'
292
+ producer:
293
+ name: ''
294
+ url: ''
295
+
296
+ - regex: '(Meta)?Feedly(Bot|App)?'
297
+ name: 'Feedly'
298
+ url: 'http://www.feedly.com'
299
+ category: 'Feed Fetcher'
300
+ producer:
301
+ name: ''
302
+ url: ''
303
+
304
+ - regex: 'Feedspot'
305
+ name: 'Feedspot'
306
+ url: 'http://www.feedspot.com'
307
+ category: 'Feed Fetcher'
308
+ producer:
309
+ name: ''
310
+ url: ''
311
+
312
+ - regex: 'Fever'
313
+ name: 'Fever'
314
+ url: 'http://feedafever.com/'
315
+ category: 'Feed Fetcher'
316
+ producer:
317
+ name: ''
318
+ url: ''
319
+
320
+ - regex: 'Genieo'
321
+ name: 'Genieo Web filter'
322
+ category: ''
323
+ url: 'http://www.genieo.com/webfilter.html'
324
+ producer:
325
+ name: 'Genieo'
326
+ url: 'http://www.genieo.com'
327
+
328
+ - regex: 'ichiro/mobile goo'
329
+ name: 'Goo'
330
+ category: 'Search bot'
331
+ url: 'http://search.goo.ne.jp/option/use/sub4/sub4-1'
332
+ producer:
333
+ name: 'NTT Resonant'
334
+ url: 'http://goo.ne.jp'
335
+
336
+ - regex: 'Google Page Speed Insights'
337
+ name: 'Google PageSpeed Insights'
338
+ category: 'Site Monitor'
339
+ url: 'http://developers.google.com/speed/pagespeed/insights/'
340
+ producer:
341
+ name: 'Google Inc.'
342
+ url: 'http://www.google.com'
343
+
344
+ - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer'
345
+ name: 'Googlebot'
346
+ category: 'Search bot'
347
+ url: 'http://www.google.com/bot.html'
348
+ producer:
349
+ name: 'Google Inc.'
350
+ url: 'http://www.google.com'
351
+
352
+ - regex: 'heritrix'
353
+ name: 'Heritrix'
354
+ category: 'Crawler'
355
+ url: 'https://webarchive.jira.com/wiki/display/Heritrix/Heritrix'
356
+ producer:
357
+ name: 'The Internet Archive'
358
+ url: 'http://www.archive.org'
359
+
360
+ - regex: 'HTTPMon'
361
+ name: 'HTTPMon'
362
+ category: 'Site Monitor'
363
+ url: 'http://www.httpmon.com'
364
+ producer:
365
+ name: 'towards GmbH'
366
+ url: 'http://www.towards.ch/'
367
+
368
+ - regex: 'iisbot'
369
+ name: 'IIS Site Analysis'
370
+ category: 'crawler'
371
+ url: 'http://www.iis.net/iisbot.html'
372
+ producer:
373
+ name: 'Microsoft Corporation'
374
+ url: 'http://www.microsoft.com'
375
+
376
+ - regex: 'kouio'
377
+ name: 'Kouio'
378
+ url: 'http://kouio.com/'
379
+ category: 'Feed Fetcher'
380
+ producer:
381
+ name: ''
382
+ url: ''
383
+
384
+ - regex: 'linkdexbot(-mobile)?|linkdex.com'
385
+ name: 'Linkdex Bot'
386
+ category: 'Search bot'
387
+ url: 'http://www.linkdex.com/bots'
388
+ producer:
389
+ name: 'Mojeek Ltd.'
390
+ url: 'http://www.mojeek.com'
391
+
392
+ - regex: 'LinkedInBot'
393
+ name: 'LinkedIn Bot'
394
+ category: 'Social Media Agent'
395
+ url: 'http://www.linkedin.com'
396
+ producer:
397
+ name: 'LinkedIn'
398
+ url: 'http://www.linkedin.com'
399
+
400
+ - regex: 'Mail.RU(_Bot)?'
401
+ name: 'Mail.Ru Bot'
402
+ category: 'Search bot'
403
+ url: 'http://help.mail.ru/webmaster/indexing/robots/types_robots'
404
+ producer:
405
+ name: 'Mail.Ru Group'
406
+ url: 'http://corp.mail.ru'
407
+
408
+ - regex: 'magpie-crawler'
409
+ name: 'Magpie-Crawler'
410
+ category: 'Social Media Agent'
411
+ url: 'http://www.brandwatch.com/magpie-crawler/'
412
+ producer:
413
+ name: 'Brandwatch'
414
+ url: 'http://www.brandwatch.com'
415
+
416
+ - regex: 'MagpieRSS'
417
+ name: 'MagpieRSS'
418
+ url: 'http://magpierss.sourceforge.net/'
419
+ category: 'Feed Parser'
420
+ producer:
421
+ name: ''
422
+ url: ''
423
+
424
+ - regex: 'meanpathbot'
425
+ name: 'Meanpath Bot'
426
+ category: 'Search bot'
427
+ url: 'http://www.meanpath.com/meanpathbot.html'
428
+ producer:
429
+ name: 'Meanpath'
430
+ url: 'http://www.meanpath.com'
431
+
432
+ - regex: 'MixrankBot'
433
+ name: 'Mixrank Bot'
434
+ category: 'Crawler'
435
+ url: 'http://mixrank.com'
436
+ producer:
437
+ name: 'Online Media Group, Inc.'
438
+ url: ''
439
+
440
+ - regex: 'MJ12bot'
441
+ name: 'MJ12 Bot'
442
+ category: 'Search bot'
443
+ url: 'http://majestic12.co.uk/bot.php'
444
+ producer:
445
+ name: 'Majestic-12'
446
+ url: 'http://majestic12.co.uk'
447
+
448
+ - regex: 'MojeekBot'
449
+ name: 'MojeekBot'
450
+ category: 'Search bot'
451
+ url: 'http://www.mojeek.com/bot.html'
452
+ producer:
453
+ name: 'Mojeek Ltd.'
454
+ url: 'http://www.mojeek.com'
455
+
456
+ - regex: 'NalezenCzBot'
457
+ name: 'NalezenCzBot'
458
+ category: 'Crawler'
459
+ url: 'http://www.nalezen.cz/about-crawler'
460
+ producer:
461
+ name: 'Jaroslav Kuboš'
462
+ url: ''
463
+
464
+ - regex: 'Netcraft Web Server Survey'
465
+ name: 'Netcraft Survey Bot'
466
+ category: 'Search bot'
467
+ url: ''
468
+ producer:
469
+ name: 'Netcraft'
470
+ url: 'http://www.netcraft.com'
471
+
472
+ - regex: 'Netvibes'
473
+ name: 'Netvibes'
474
+ url: 'http://www.netvibes.com/'
475
+ category: 'Feed Fetcher'
476
+ producer:
477
+ name: ''
478
+ url: ''
479
+
480
+ - regex: 'NewsBlur .*(Fetcher|Finder)'
481
+ name: 'NewsBlur'
482
+ url: 'http://www.newsblur.com'
483
+ category: 'Feed Fetcher'
484
+ producer:
485
+ name: ''
486
+ url: ''
487
+
488
+ - regex: 'NewsGatorOnline'
489
+ name: 'NewsGator'
490
+ url: 'http://www.newsgator.com'
491
+ category: 'Feed Fetcher'
492
+ producer:
493
+ name: ''
494
+ url: ''
495
+
496
+ - regex: 'nlcrawler'
497
+ name: 'NLCrawler'
498
+ category: 'Crawler'
499
+ url: ''
500
+ producer:
501
+ name: 'Northern Light'
502
+ url: 'http://northernlight.com'
503
+
504
+ - regex: 'omgilibot'
505
+ name: 'Omgili bot'
506
+ category: 'Search bot'
507
+ url: 'http://www.omgili.com/Crawler.html'
508
+ producer:
509
+ name: 'Omgili'
510
+ url: 'http://www.omgili.com'
511
+
512
+ - regex: 'OpenindexSpider'
513
+ name: 'Openindex Spider'
514
+ category: 'Search bot'
515
+ url: 'http://www.openindex.io/en/webmasters/spider.html'
516
+ producer:
517
+ name: 'Openindex B.V.'
518
+ url: 'http://www.openindex.io'
519
+
520
+ - regex: 'spbot'
521
+ name: 'OpenLinkProfiler'
522
+ category: 'Crawler'
523
+ url: 'http://openlinkprofiler.org/bot'
524
+ producer:
525
+ name: 'Axandra GmbH'
526
+ url: 'http://www.axandra.com'
527
+
528
+ - regex: 'OpenWebSpider'
529
+ name: 'OpenWebSpider'
530
+ category: 'Crawler'
531
+ url: 'http://www.openwebspider.org'
532
+ producer:
533
+ name: 'OpenWebSpider Lab'
534
+ url: 'http://lab.openwebspider.org'
535
+
536
+ - regex: 'PaperLiBot'
537
+ name: 'PaperLiBot'
538
+ category: 'Search bot'
539
+ url: 'http://support.paper.li/entries/20023257-what-is-paper-li'
540
+ producer:
541
+ name: 'Smallrivers SA'
542
+ url: 'http://www.paper.li'
543
+
544
+ - regex: 'psbot(-page)?'
545
+ name: 'Picsearch bot'
546
+ category: 'Search bot'
547
+ url: 'http://www.picsearch.com/bot.html'
548
+ producer:
549
+ name: 'Picsearch'
550
+ url: 'http://www.picsearch.com'
551
+
552
+ - regex: 'Pingdom.com'
553
+ name: 'Pingdom Bot'
554
+ category: 'Site Monitors'
555
+ url: ''
556
+ producer:
557
+ name: 'Pingdom AB'
558
+ url: 'https://www.pingdom.com'
559
+
560
+ - regex: 'QuerySeekerSpider'
561
+ name: 'QuerySeekerSpider'
562
+ category: 'Crawler'
563
+ url: 'http://queryseeker.com/bot.html'
564
+ producer:
565
+ name: 'QueryEye Inc.'
566
+ url: 'http://queryeye.com'
567
+
568
+ - regex: 'redditbot'
569
+ name: 'Reddit Bot'
570
+ category: 'Social Media Agent'
571
+ url: 'http://www.reddit.com/feedback'
572
+ producer:
573
+ name: 'reddit inc.'
574
+ url: 'http://www.reddit.com'
575
+
576
+ - regex: 'rogerbot'
577
+ name: 'Rogerbot'
578
+ category: 'Crawler'
579
+ url: 'http://moz.com/help/pro/what-is-rogerbot-'
580
+ producer:
581
+ name: 'SEOmoz, Inc.'
582
+ url: 'http://moz.com/'
583
+
584
+ - regex: 'Screaming Frog SEO Spider'
585
+ name: 'Screaming Frog SEO Spider'
586
+ category: 'Crawler'
587
+ url: 'http://www.screamingfrog.co.uk/seo-spider'
588
+ producer:
589
+ name: 'Screaming Frog Ltd'
590
+ url: 'http://www.screamingfrog.co.uk'
591
+
592
+ - regex: 'ScreenerBot'
593
+ name: 'ScreenerBot'
594
+ category: 'Crawler'
595
+ url: 'http://www.screenerbot.com'
596
+ producer:
597
+ name: ''
598
+ url: ''
599
+
600
+ - regex: 'SemrushBot'
601
+ name: 'Semrush Bot'
602
+ category: 'Crawler'
603
+ url: 'http://www.semrush.com/bot.html'
604
+ producer:
605
+ name: 'SEMrush'
606
+ url: 'http://www.semrush.com'
607
+
608
+ - regex: 'SensikaBot'
609
+ name: 'Sensika Bot'
610
+ category: ''
611
+ url: ''
612
+ producer:
613
+ name: 'Sensika'
614
+ url: 'http://sensika.com'
615
+
616
+ - regex: 'SEOENG(World)?Bot'
617
+ name: 'SEOENGBot'
618
+ category: 'Crawler'
619
+ url: 'http://www.seoengine.com/seoengbot.htm'
620
+ producer:
621
+ name: 'SEO Engine'
622
+ url: 'http://www.seoengine.com'
623
+
624
+ - regex: 'SeznamBot|SklikBot|Seznam screenshot-generator'
625
+ name: 'Seznam Bot'
626
+ category: 'Search bot'
627
+ url: 'http://www.mapy.cz/cz/seznambot.html'
628
+ producer:
629
+ name: 'Seznam.cz, a.s.'
630
+ url: 'http://www.seznam.cz/'
631
+
632
+ - regex: 'ShopWiki'
633
+ name: 'ShopWiki'
634
+ category: 'Search tools'
635
+ url: 'http://www.shopwiki.com/wiki/Help:Bot'
636
+ producer:
637
+ name: 'ShopWiki Corp.'
638
+ url: 'http://www.shopwiki.com'
639
+
640
+ - regex: 'SilverReader'
641
+ name: 'SilverReader'
642
+ url: 'http://silverreader.com'
643
+ category: 'Feed Fetcher'
644
+ producer:
645
+ name: ''
646
+ url: ''
647
+
648
+ - regex: 'SimplePie'
649
+ name: 'SimplePie'
650
+ url: 'http://www.simplepie.org'
651
+ category: 'Feed Parser'
652
+ producer:
653
+ name: ''
654
+ url: ''
655
+
656
+ - regex: 'SISTRIX Crawler'
657
+ name: 'SISTRIX Crawler'
658
+ category: 'Crawler'
659
+ url: 'http://crawler.sistrix.net'
660
+ producer:
661
+ name: 'SISTRIX GmbH'
662
+ url: 'http://www.sistrix.de'
663
+
664
+ - regex: '(Sogou (web|inst|Pic) spider)|New-Sogou-Spider'
665
+ name: 'Sogou Spider'
666
+ category: 'Search bot'
667
+ url: 'http://www.sogou.com/docs/help/webmasters.htm'
668
+ producer:
669
+ name: 'Sohu, Inc.'
670
+ url: 'http://www.sogou.com'
671
+
672
+ - regex: 'Sosospider|Sosoimagespider'
673
+ name: 'Soso Spider'
674
+ category: 'Search bot'
675
+ url: 'http://help.soso.com/webspider.htm'
676
+ producer:
677
+ name: 'Tencent Holdings'
678
+ url: 'http://www.soso.com'
679
+
680
+ - regex: 'Superfeedr bot'
681
+ name: 'Superfeedr Bot'
682
+ category: 'Feed Fetcher'
683
+ url: ''
684
+ producer:
685
+ name: 'Superfeedr'
686
+ url: 'https://superfeedr.com/'
687
+
688
+ - regex: 'Spinn3r'
689
+ name: 'Spinn3r'
690
+ category: 'Crawler'
691
+ url: 'http://spinn3r.com/robot'
692
+ producer:
693
+ name: 'Tailrank Inc'
694
+ url: 'http://spinn3r.com'
695
+
696
+ - regex: 'Sputnik(Image)?Bot'
697
+ name: 'Sputnik Bot'
698
+ category: ''
699
+ url: ''
700
+ producer:
701
+ name: ''
702
+ url: ''
703
+
704
+ - regex: 'SurveyBot'
705
+ name: 'Survey Bot'
706
+ category: 'Search bot'
707
+ url: 'http://www.domaintools.com/webmasters/surveybot.php'
708
+ producer:
709
+ name: 'Domain Tools'
710
+ url: 'http://www.domaintools.com'
711
+
712
+ - regex: 'TinEye-bot'
713
+ name: 'TinEye Crawler'
714
+ category: 'Search bot'
715
+ url: 'http://www.tineye.com/crawler.html'
716
+ producer:
717
+ name: 'Idée Inc.'
718
+ url: 'http://ideeinc.com'
719
+
720
+ - regex: 'Tiny Tiny RSS'
721
+ name: 'Tiny Tiny RSS'
722
+ url: 'http://tt-rss.org'
723
+ category: 'Feed Fetcher'
724
+ producer:
725
+ name: ''
726
+ url: ''
727
+
728
+ - regex: 'TurnitinBot'
729
+ name: 'TurnitinBot'
730
+ category: 'Crawler'
731
+ url: 'http://www.turnitin.com/robot/crawlerinfo.html'
732
+ producer:
733
+ name: 'iParadigms, LLC.'
734
+ url: 'http://www.turnitin.com'
735
+
736
+ - regex: 'TweetedTimes Bot'
737
+ name: 'TweetedTimes Bot'
738
+ category: 'Crawler'
739
+ url: 'http://tweetedtimes.com'
740
+ producer:
741
+ name: 'TweetedTimes'
742
+ url: 'http://tweetedtimes.com/'
743
+
744
+ - regex: 'TweetmemeBot'
745
+ name: 'Tweetmeme Bot'
746
+ category: 'Crawler'
747
+ url: 'http://tweetmeme.com/'
748
+ producer:
749
+ name: 'Mediasift'
750
+ url: ''
751
+
752
+ - regex: 'Twitterbot'
753
+ name: 'Twitterbot'
754
+ category: 'Social Media Agent'
755
+ url: 'https://dev.twitter.com/docs/cards/getting-started'
756
+ producer:
757
+ name: 'Twitter'
758
+ url: 'http://www.twitter.com'
759
+
760
+ - regex: 'UptimeRobot'
761
+ name: 'Uptime Robot'
762
+ category: 'Site Monitor'
763
+ url: ''
764
+ producer:
765
+ name: 'Uptime Robot'
766
+ url: 'http://uptimerobot.com'
767
+
768
+ - regex: 'URLAppendBot'
769
+ name: 'URLAppendBot'
770
+ category: 'Crawler'
771
+ url: 'http://www.profound.net/urlappendbot.html'
772
+ producer:
773
+ name: 'Profound Networks'
774
+ url: 'http://www.profound.net'
775
+
776
+ - regex: 'VSMCrawler'
777
+ name: 'Visual Site Mapper Crawler'
778
+ category: 'Crawler'
779
+ url: 'http://www.visualsitemapper.com/crawler'
780
+ producer:
781
+ name: 'Alentum Software Ltd.'
782
+ url: 'http://www.alentum.com'
783
+
784
+ - regex: 'VoilaBot'
785
+ name: 'Voila Bot'
786
+ category: 'Search bot'
787
+ url: 'http://www.voila.fr'
788
+ producer:
789
+ name: ''
790
+ url: ''
791
+
792
+ - regex: 'Jigsaw'
793
+ name: 'W3C CSS Validator'
794
+ category: 'Validator'
795
+ url: 'http://jigsaw.w3.org/css-validator'
796
+ producer:
797
+ name: 'W3C'
798
+ url: 'http://www.w3.org'
799
+
800
+ - regex: 'W3C_I18n-Checker'
801
+ name: 'W3C I18N Checker'
802
+ category: 'Validator'
803
+ url: 'http://validator.w3.org/i18n-checker'
804
+ producer:
805
+ name: 'W3C'
806
+ url: 'http://www.w3.org'
807
+
808
+ - regex: 'W3C-checklink'
809
+ name: 'W3C Link Checker'
810
+ category: 'Validator'
811
+ url: 'http://validator.w3.org/checklink'
812
+ producer:
813
+ name: 'W3C'
814
+ url: 'http://www.w3.org'
815
+
816
+ - regex: 'W3C_Validator'
817
+ name: 'W3C Markup Validation Service'
818
+ category: 'Validator'
819
+ url: 'http://validator.w3.org/services'
820
+ producer:
821
+ name: 'W3C'
822
+ url: 'http://www.w3.org'
823
+
824
+ - regex: 'W3C-mobileOK'
825
+ name: 'W3C MobileOK Checker'
826
+ category: 'Validator'
827
+ url: 'http://validator.w3.org/mobile'
828
+ producer:
829
+ name: 'W3C'
830
+ url: 'http://www.w3.org'
831
+
832
+ - regex: 'W3C_Unicorn'
833
+ name: 'W3C Unified Validator'
834
+ category: 'Validator'
835
+ url: 'http://validator.w3.org/unicorn'
836
+ producer:
837
+ name: 'W3C'
838
+ url: 'http://www.w3.org'
839
+
840
+ - regex: 'WeSEE(:Search)?'
841
+ name: 'WeSEE:Search'
842
+ category: 'Search bot'
843
+ url: 'http://www.wesee.com/bot'
844
+ producer:
845
+ name: 'WeSEE Ltd'
846
+ url: 'http://www.wesee.com'
847
+
848
+ - regex: 'WebbCrawler'
849
+ name: 'WebbCrawler'
850
+ category: 'Crawler'
851
+ url: 'http://badcheese.com/crawler.html'
852
+ producer:
853
+ name: 'Steve Webb'
854
+ url: 'http://badcheese.com'
855
+
856
+ - regex: 'Wotbox'
857
+ name: 'Wotbox'
858
+ category: 'Search bot'
859
+ url: 'http://www.wotbox.com/bot/'
860
+ producer:
861
+ name: 'Wotbox'
862
+ url: 'http://www.wotbox.com'
863
+
864
+ - regex: 'yacybot'
865
+ name: 'YaCy'
866
+ category: 'Search bot'
867
+ url: 'http://yacy.net/bot.html'
868
+ producer:
869
+ name: 'YaCy'
870
+ url: 'http://yacy.net'
871
+
872
+ - regex: 'Yahoo! Slurp|Yahoo!-AdCrawler'
873
+ name: 'Yahoo! Slurp'
874
+ category: 'Search bot'
875
+ url: 'http://help.yahoo.com/ysearch/slurp'
876
+ producer:
877
+ name: 'Yahoo! Inc.'
878
+ url: 'http://www.yahoo.com'
879
+
880
+ - regex: 'Yandex(Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|News(links)?|Metrika|.Gazeta Bot)'
881
+ name: 'Yandex Bot'
882
+ category: 'Search bot'
883
+ url: 'http://www.yandex.com/bots'
884
+ producer:
885
+ name: 'Yandex LLC'
886
+ url: 'http://company.yandex.com'
887
+
888
+ - regex: 'Yeti'
889
+ name: 'Yeti/Naverbot'
890
+ category: 'Search bot'
891
+ url: 'http://help.naver.com/robots/'
892
+ producer:
893
+ name: 'Naver'
894
+ url: 'http://www.naver.com'
895
+
896
+ - regex: 'YoudaoBot'
897
+ name: 'Youdao Bot'
898
+ category: 'Search bot'
899
+ url: 'http://www.youdao.com/help/webmaster/spider'
900
+ producer:
901
+ name: 'NetEase, Inc.'
902
+ url: 'http://corp.163.com'
903
+
904
+ - regex: 'YRSpider|YYSpider'
905
+ name: 'Yunyun Bot'
906
+ category: 'Search bot'
907
+ url: 'http://www.yunyun.com/SiteInfo.php?r=about'
908
+ producer:
909
+ name: 'YunYun'
910
+ url: 'http://www.yunyun.com'
911
+
912
+ - regex: 'Zookabot'
913
+ name: 'Zookabot'
914
+ category: 'Crawler'
915
+ url: 'http://zookabot.com'
916
+ producer:
917
+ name: 'Hwacha ApS'
918
+ url: 'http://hwacha.dk'
919
+
920
+ - regex: 'ZumBot'
921
+ name: 'ZumBot'
922
+ category: 'Search bot'
923
+ url: 'http://help.zum.com/inquiry'
924
+ producer:
925
+ name: 'ZUM internet'
926
+ url: 'http://www.zuminternet.com/'
927
+
928
+ - regex: 'YottaaMonitor'
929
+ name: 'Yottaa Site Monitor'
930
+ category: 'Site Monitor'
931
+ url: 'http://www.yottaa.com/products/site-monitor'
932
+ producer:
933
+ name: 'Yottaa'
934
+ url: 'http://www.yottaa.com/'
935
+
936
+
937
+
938
+ - regex: 'lycos'
939
+ name: 'Lycos'
940
+
941
+ - regex: 'Slurp'
942
+ name: 'Inktomi Slurp'
943
+
944
+ - regex: 'Speedy Spider'
945
+ name: 'Speedy'
946
+
947
+ - regex: 'ScoutJet'
948
+ name: 'ScoutJet'
949
+
950
+ - regex: 'nrsbot|netresearch'
951
+ name: 'NetResearchServer'
952
+
953
+ - regex: 'scooter'
954
+ name: 'Scooter'
955
+
956
+ - regex: 'gigabot'
957
+ name: 'Gigabot'
958
+
959
+ - regex: 'charlotte'
960
+ name: 'Charlotte'
961
+
962
+ - regex: 'Pompos'
963
+ name: 'Pompos'
964
+
965
+ - regex: 'ichiro'
966
+ name: 'ichiro'
967
+
968
+ - regex: 'PagePeeker'
969
+ name: 'PagePeeker'
970
+
971
+ - regex: 'WebThumbnail'
972
+ name: 'WebThumbnail'
973
+
974
+ - regex: 'Willow Internet Crawler'
975
+ name: 'Willow Internet Crawler'
976
+
977
+ - regex: 'EmailWolf'
978
+ name: 'EmailWolf'
979
+
980
+
981
+ - regex: '(nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex|zao|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Catchpoint bot|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
982
+ name: 'Bot'
983
+
984
+ # Generic detections
985
+
986
+ - regex: 'Nutch'
987
+ name: 'Nutch-based Bot'
988
+ category: 'crawler'
989
+ url: 'https://nutch.apache.org'
990
+ producer:
991
+ name: 'The Apache Software Foundation'
992
+ url: 'http://www.apache.org/foundation/'
993
+
994
+ # Original:
995
+ # - regex: '[a-z0-9-_]*(bot|crawler|archiver|transcoder|spider)'
996
+ # name: 'Generic Bot'
997
+ #
998
+ # Adapted for Ruby:
999
+ # Note the backslash added in [a-z0-9\-_], it is necessary to avoid
1000
+ # warnings from the interpreter
1001
+ - regex: '[a-z0-9\-_]*(bot|crawler|archiver|transcoder|spider)'
1002
+ name: 'Generic Bot'
@@ -48,6 +48,22 @@ RSpec.describe DeviceDetector do
48
48
 
49
49
  end
50
50
 
51
+ describe '#bot?' do
52
+
53
+ it 'returns false' do
54
+ expect(client.bot?).to eq(false)
55
+ end
56
+
57
+ end
58
+
59
+ describe '#bot_name' do
60
+
61
+ it 'returns nil' do
62
+ expect(client.bot_name).to be_nil
63
+ end
64
+
65
+ end
66
+
51
67
  end
52
68
 
53
69
  context 'unknown user agent' do
@@ -94,5 +110,83 @@ RSpec.describe DeviceDetector do
94
110
 
95
111
  end
96
112
 
113
+ describe '#bot?' do
114
+
115
+ it 'returns false' do
116
+ expect(client.bot?).to eq(false)
117
+ end
118
+
119
+ end
120
+
121
+ describe '#bot_name' do
122
+
123
+ it 'returns nil' do
124
+ expect(client.bot_name).to be_nil
125
+ end
126
+
127
+ end
128
+
129
+ end
130
+
131
+ context 'bot' do
132
+
133
+ let(:user_agent) { 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)' }
134
+
135
+ describe '#name' do
136
+
137
+ it 'returns nil' do
138
+ expect(client.name).to be_nil
139
+ end
140
+
141
+ end
142
+
143
+ describe '#full_version' do
144
+
145
+ it 'returns nil' do
146
+ expect(client.full_version).to be_nil
147
+ end
148
+
149
+ end
150
+
151
+ describe '#os_name' do
152
+
153
+ it 'returns nil' do
154
+ expect(client.os_name).to be_nil
155
+ end
156
+
157
+ end
158
+
159
+ describe '#os_full_version' do
160
+
161
+ it 'returns nil' do
162
+ expect(client.os_full_version).to be_nil
163
+ end
164
+
165
+ end
166
+
167
+ describe '#known?' do
168
+
169
+ it 'returns false' do
170
+ expect(client.known?).to eq(false)
171
+ end
172
+
173
+ end
174
+
175
+ describe '#bot?' do
176
+
177
+ it 'returns true' do
178
+ expect(client.bot?).to eq(true)
179
+ end
180
+
181
+ end
182
+
183
+ describe '#bot_name' do
184
+
185
+ it 'returns the name of the bot' do
186
+ expect(client.bot_name).to eq('Googlebot')
187
+ end
188
+
189
+ end
190
+
97
191
  end
98
192
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: device_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mati Sójka
@@ -60,11 +60,13 @@ files:
60
60
  - Rakefile
61
61
  - device_detector.gemspec
62
62
  - lib/device_detector.rb
63
+ - lib/device_detector/bot.rb
63
64
  - lib/device_detector/client.rb
64
65
  - lib/device_detector/os.rb
65
66
  - lib/device_detector/parser.rb
66
67
  - lib/device_detector/version.rb
67
68
  - lib/device_detector/version_extractor.rb
69
+ - regexes/bots.yml
68
70
  - regexes/browser_engines.yml
69
71
  - regexes/browsers.yml
70
72
  - regexes/feed_readers.yml