device_detector 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +1 -1
  5. data/CHANGELOG.md +5 -0
  6. data/README.md +4 -4
  7. data/Rakefile +6 -17
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector.rb +17 -28
  10. data/lib/device_detector/bot.rb +2 -2
  11. data/lib/device_detector/client.rb +3 -2
  12. data/lib/device_detector/device.rb +44 -21
  13. data/lib/device_detector/memory_cache.rb +5 -5
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +121 -114
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +1 -1
  20. data/lib/device_detector/version_extractor.rb +2 -3
  21. data/regexes/bots.yml +228 -12
  22. data/regexes/client/browser_engine.yml +4 -1
  23. data/regexes/client/browsers.yml +358 -34
  24. data/regexes/client/feed_readers.yml +1 -1
  25. data/regexes/client/libraries.yml +16 -1
  26. data/regexes/client/mediaplayers.yml +14 -2
  27. data/regexes/client/mobile_apps.yml +26 -2
  28. data/regexes/client/pim.yml +1 -1
  29. data/regexes/device/cameras.yml +1 -1
  30. data/regexes/device/car_browsers.yml +7 -3
  31. data/regexes/device/consoles.yml +3 -3
  32. data/regexes/device/mobiles.yml +2468 -823
  33. data/regexes/device/portable_media_player.yml +2 -2
  34. data/regexes/device/televisions.yml +1 -1
  35. data/regexes/oss.yml +20 -5
  36. data/regexes/vendorfragments.yml +5 -1
  37. data/spec/device_detector/device_spec.rb +4 -24
  38. data/spec/fixtures/client/browser.yml +632 -2
  39. data/spec/fixtures/client/library.yml +18 -0
  40. data/spec/fixtures/client/mobile_app.yml +24 -0
  41. data/spec/fixtures/detector/bots.yml +533 -67
  42. data/spec/fixtures/detector/camera.yml +20 -0
  43. data/spec/fixtures/detector/car_browser.yml +60 -0
  44. data/spec/fixtures/detector/console.yml +40 -0
  45. data/spec/fixtures/detector/desktop.yml +140 -40
  46. data/spec/fixtures/detector/mediaplayer.yml +43 -0
  47. data/spec/fixtures/detector/mobile_apps.yml +40 -2
  48. data/spec/fixtures/detector/phablet.yml +383 -1
  49. data/spec/fixtures/detector/smart_speaker.yml +55 -0
  50. data/spec/fixtures/detector/smartphone-1.yml +3669 -3695
  51. data/spec/fixtures/detector/smartphone-10.yml +4147 -4094
  52. data/spec/fixtures/detector/smartphone-11.yml +4475 -4473
  53. data/spec/fixtures/detector/smartphone-12.yml +5502 -4251
  54. data/spec/fixtures/detector/smartphone-13.yml +9920 -0
  55. data/spec/fixtures/detector/smartphone-14.yml +2662 -0
  56. data/spec/fixtures/detector/smartphone-2.yml +3738 -3713
  57. data/spec/fixtures/detector/smartphone-3.yml +3970 -3912
  58. data/spec/fixtures/detector/smartphone-4.yml +4518 -4506
  59. data/spec/fixtures/detector/smartphone-5.yml +3645 -3664
  60. data/spec/fixtures/detector/smartphone-6.yml +3868 -3829
  61. data/spec/fixtures/detector/smartphone-7.yml +3992 -3985
  62. data/spec/fixtures/detector/smartphone-8.yml +4510 -4475
  63. data/spec/fixtures/detector/smartphone-9.yml +4226 -4255
  64. data/spec/fixtures/detector/smartphone.yml +3197 -3197
  65. data/spec/fixtures/detector/tablet-1.yml +3940 -3968
  66. data/spec/fixtures/detector/tablet-2.yml +3773 -3764
  67. data/spec/fixtures/detector/tablet-3.yml +5243 -3071
  68. data/spec/fixtures/detector/tablet-4.yml +4528 -0
  69. data/spec/fixtures/detector/tablet.yml +3145 -3125
  70. data/spec/fixtures/detector/tv.yml +1695 -95
  71. data/spec/fixtures/detector/unknown.yml +13 -60
  72. data/spec/fixtures/detector/wearable.yml +61 -0
  73. data/spec/fixtures/device/car_browser.yml +6 -0
  74. data/spec/fixtures/parser/oss.yml +36 -1
  75. data/spec/fixtures/parser/vendorfragments.yml +6 -0
  76. metadata +32 -7
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class ModelExtractor < MetadataExtractor
3
-
4
5
  def call
5
- s = super.to_s.gsub('_',' ').strip
6
+ s = super.to_s.gsub('_', ' ').strip
6
7
  s = s.gsub(/ TD$/i, '')
7
8
 
8
9
  return nil if s == 'Build'
@@ -19,6 +20,5 @@ class DeviceDetector
19
20
  def regex
20
21
  @regex ||= regex_meta[:regex_model] || regex_meta[:regex]
21
22
  end
22
-
23
23
  end
24
24
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class NameExtractor < MetadataExtractor
3
-
4
5
  def call
5
6
  if /\$[0-9]/ =~ metadata_string
6
7
  extract_metadata
@@ -14,6 +15,5 @@ class DeviceDetector
14
15
  def metadata_string
15
16
  regex_meta[:name]
16
17
  end
17
-
18
18
  end
19
19
  end
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  class DeviceDetector
4
6
  class OS < Parser
5
-
6
7
  def name
7
8
  os_info[:name]
8
9
  end
@@ -38,128 +39,134 @@ class DeviceDetector
38
39
  end
39
40
  end
40
41
 
41
- DESKTOP_OSS = Set.new(['AmigaOS', 'IBM', 'GNU/Linux', 'Mac', 'Unix', 'Windows', 'BeOS', 'Chrome OS'])
42
+ DESKTOP_OSS = Set.new(
43
+ [
44
+ 'AmigaOS', 'IBM', 'GNU/Linux', 'Mac', 'Unix', 'Windows', 'BeOS', 'Chrome OS'
45
+ ]
46
+ )
42
47
 
43
48
  # OS short codes mapped to long names
44
49
  OPERATING_SYSTEMS = {
45
- 'AIX' => 'AIX',
46
- 'AND' => 'Android',
47
- 'AMG' => 'AmigaOS',
48
- 'ATV' => 'Apple TV',
49
- 'ARL' => 'Arch Linux',
50
- 'BTR' => 'BackTrack',
51
- 'SBA' => 'Bada',
52
- 'BEO' => 'BeOS',
53
- 'BLB' => 'BlackBerry OS',
54
- 'QNX' => 'BlackBerry Tablet OS',
55
- 'BMP' => 'Brew',
56
- 'CES' => 'CentOS',
57
- 'COS' => 'Chrome OS',
58
- 'CYN' => 'CyanogenMod',
59
- 'DEB' => 'Debian',
60
- 'DFB' => 'DragonFly',
61
- 'FED' => 'Fedora',
62
- 'FOS' => 'Firefox OS',
63
- 'FIR' => 'Fire OS',
64
- 'BSD' => 'FreeBSD',
65
- 'GNT' => 'Gentoo',
66
- 'GTV' => 'Google TV',
67
- 'HPX' => 'HP-UX',
68
- 'HAI' => 'Haiku OS',
69
- 'IRI' => 'IRIX',
70
- 'INF' => 'Inferno',
71
- 'KOS' => 'KaiOS',
72
- 'KNO' => 'Knoppix',
73
- 'KBT' => 'Kubuntu',
74
- 'LIN' => 'GNU/Linux',
75
- 'LBT' => 'Lubuntu',
76
- 'VLN' => 'VectorLinux',
77
- 'MAC' => 'Mac',
78
- 'MAE' => 'Maemo',
79
- 'MDR' => 'Mandriva',
80
- 'SMG' => 'MeeGo',
81
- 'MCD' => 'MocorDroid',
82
- 'MIN' => 'Mint',
83
- 'MLD' => 'MildWild',
84
- 'MOR' => 'MorphOS',
85
- 'NBS' => 'NetBSD',
86
- 'MTK' => 'MTK / Nucleus',
87
- 'WII' => 'Nintendo',
88
- 'NDS' => 'Nintendo Mobile',
89
- 'OS2' => 'OS/2',
90
- 'T64' => 'OSF1',
91
- 'OBS' => 'OpenBSD',
92
- 'PSP' => 'PlayStation Portable',
93
- 'PS3' => 'PlayStation',
94
- 'RHT' => 'Red Hat',
95
- 'ROS' => 'RISC OS',
96
- 'REM' => 'Remix OS',
97
- 'RZD' => 'RazoDroiD',
98
- 'SAB' => 'Sabayon',
99
- 'SSE' => 'SUSE',
100
- 'SAF' => 'Sailfish OS',
101
- 'SLW' => 'Slackware',
102
- 'SOS' => 'Solaris',
103
- 'SYL' => 'Syllable',
104
- 'SYM' => 'Symbian',
105
- 'SYS' => 'Symbian OS',
106
- 'S40' => 'Symbian OS Series 40',
107
- 'S60' => 'Symbian OS Series 60',
108
- 'SY3' => 'Symbian^3',
109
- 'TDX' => 'ThreadX',
110
- 'TIZ' => 'Tizen',
111
- 'UBT' => 'Ubuntu',
112
- 'WTV' => 'WebTV',
113
- 'WIN' => 'Windows',
114
- 'WCE' => 'Windows CE',
115
- 'WIO' => 'Windows IoT',
116
- 'WMO' => 'Windows Mobile',
117
- 'WPH' => 'Windows Phone',
118
- 'WRT' => 'Windows RT',
119
- 'XBX' => 'Xbox',
120
- 'XBT' => 'Xubuntu',
121
- 'YNS' => 'YunOs',
122
- 'IOS' => 'iOS',
123
- 'POS' => 'palmOS',
124
- 'WOS' => 'webOS'
125
- }
126
-
127
- DOWNCASED_OPERATING_SYSTEMS = OPERATING_SYSTEMS.each_with_object({}){|(short,long),h| h[long.downcase] = short}
50
+ 'AIX' => 'AIX',
51
+ 'AND' => 'Android',
52
+ 'AMG' => 'AmigaOS',
53
+ 'ATV' => 'Apple TV',
54
+ 'ARL' => 'Arch Linux',
55
+ 'BTR' => 'BackTrack',
56
+ 'SBA' => 'Bada',
57
+ 'BEO' => 'BeOS',
58
+ 'BLB' => 'BlackBerry OS',
59
+ 'QNX' => 'BlackBerry Tablet OS',
60
+ 'BMP' => 'Brew',
61
+ 'CES' => 'CentOS',
62
+ 'COS' => 'Chrome OS',
63
+ 'CYN' => 'CyanogenMod',
64
+ 'DEB' => 'Debian',
65
+ 'DFB' => 'DragonFly',
66
+ 'FED' => 'Fedora',
67
+ 'FOS' => 'Firefox OS',
68
+ 'FIR' => 'Fire OS',
69
+ 'BSD' => 'FreeBSD',
70
+ 'GNT' => 'Gentoo',
71
+ 'GTV' => 'Google TV',
72
+ 'HPX' => 'HP-UX',
73
+ 'HAI' => 'Haiku OS',
74
+ 'IRI' => 'IRIX',
75
+ 'INF' => 'Inferno',
76
+ 'KOS' => 'KaiOS',
77
+ 'KNO' => 'Knoppix',
78
+ 'KBT' => 'Kubuntu',
79
+ 'LIN' => 'GNU/Linux',
80
+ 'LBT' => 'Lubuntu',
81
+ 'VLN' => 'VectorLinux',
82
+ 'MAC' => 'Mac',
83
+ 'MAE' => 'Maemo',
84
+ 'MDR' => 'Mandriva',
85
+ 'SMG' => 'MeeGo',
86
+ 'MCD' => 'MocorDroid',
87
+ 'MIN' => 'Mint',
88
+ 'MLD' => 'MildWild',
89
+ 'MOR' => 'MorphOS',
90
+ 'NBS' => 'NetBSD',
91
+ 'MTK' => 'MTK / Nucleus',
92
+ 'WII' => 'Nintendo',
93
+ 'NDS' => 'Nintendo Mobile',
94
+ 'OS2' => 'OS/2',
95
+ 'T64' => 'OSF1',
96
+ 'OBS' => 'OpenBSD',
97
+ 'ORD' => 'Ordissimo',
98
+ 'PSP' => 'PlayStation Portable',
99
+ 'PS3' => 'PlayStation',
100
+ 'RHT' => 'Red Hat',
101
+ 'ROS' => 'RISC OS',
102
+ 'REM' => 'Remix OS',
103
+ 'RZD' => 'RazoDroiD',
104
+ 'SAB' => 'Sabayon',
105
+ 'SSE' => 'SUSE',
106
+ 'SAF' => 'Sailfish OS',
107
+ 'SLW' => 'Slackware',
108
+ 'SOS' => 'Solaris',
109
+ 'SYL' => 'Syllable',
110
+ 'SYM' => 'Symbian',
111
+ 'SYS' => 'Symbian OS',
112
+ 'S40' => 'Symbian OS Series 40',
113
+ 'S60' => 'Symbian OS Series 60',
114
+ 'SY3' => 'Symbian^3',
115
+ 'TDX' => 'ThreadX',
116
+ 'TIZ' => 'Tizen',
117
+ 'TOS' => 'TmaxOS',
118
+ 'UBT' => 'Ubuntu',
119
+ 'WTV' => 'WebTV',
120
+ 'WIN' => 'Windows',
121
+ 'WCE' => 'Windows CE',
122
+ 'WIO' => 'Windows IoT',
123
+ 'WMO' => 'Windows Mobile',
124
+ 'WPH' => 'Windows Phone',
125
+ 'WRT' => 'Windows RT',
126
+ 'XBX' => 'Xbox',
127
+ 'XBT' => 'Xubuntu',
128
+ 'YNS' => 'YunOs',
129
+ 'IOS' => 'iOS',
130
+ 'POS' => 'palmOS',
131
+ 'WOS' => 'webOS'
132
+ }.freeze
133
+
134
+ DOWNCASED_OPERATING_SYSTEMS = OPERATING_SYSTEMS.each_with_object({}) do |(short, long), h|
135
+ h[long.downcase] = short
136
+ end
128
137
 
129
138
  OS_FAMILIES = {
130
- 'Android' => ['AND', 'CYN', 'FIR', 'REM', 'RZD', 'MLD', 'MCD', 'YNS'],
131
- 'AmigaOS' => ['AMG', 'MOR'],
132
- 'Apple TV' => ['ATV'],
133
- 'BlackBerry' => ['BLB', 'QNX'],
134
- 'Brew' => ['BMP'],
135
- 'BeOS' => ['BEO', 'HAI'],
136
- 'Chrome OS' => ['COS'],
137
- 'Firefox OS' => ['FOS', 'KOS'],
138
- 'Gaming Console' => ['WII', 'PS3'],
139
- 'Google TV' => ['GTV'],
140
- 'IBM' => ['OS2'],
141
- 'iOS' => ['IOS'],
142
- 'RISC OS' => ['ROS'],
143
- 'GNU/Linux' => ['LIN', 'ARL', 'DEB', 'KNO', 'MIN', 'UBT', 'KBT', 'XBT', 'LBT', 'FED', 'RHT', 'VLN', 'MDR', 'GNT', 'SAB', 'SLW', 'SSE', 'CES', 'BTR', 'SAF'],
144
- 'Mac' => ['MAC'],
145
- 'Mobile Gaming Console' => ['PSP', 'NDS', 'XBX'],
146
- 'Real-time OS' => ['MTK', 'TDX'],
147
- 'Other Mobile' => ['WOS', 'POS', 'SBA', 'TIZ', 'SMG', 'MAE'],
148
- 'Symbian' => ['SYM', 'SYS', 'SY3', 'S60', 'S40'],
149
- 'Unix' => ['SOS', 'AIX', 'HPX', 'BSD', 'NBS', 'OBS', 'DFB', 'SYL', 'IRI', 'T64', 'INF'],
150
- 'WebTV' => ['WTV'],
151
- 'Windows' => ['WIN'],
152
- 'Windows Mobile' => ['WPH', 'WMO', 'WCE', 'WRT', 'WIO']
153
- }
154
-
155
- FAMILY_TO_OS = OS_FAMILIES.each_with_object({}) do |(family,oss),h|
156
- oss.each{|os| h[os] = family}
139
+ 'Android' => %w[AND CYN FIR REM RZD MLD MCD YNS],
140
+ 'AmigaOS' => %w[AMG MOR],
141
+ 'Apple TV' => ['ATV'],
142
+ 'BlackBerry' => %w[BLB QNX],
143
+ 'Brew' => ['BMP'],
144
+ 'BeOS' => %w[BEO HAI],
145
+ 'Chrome OS' => ['COS'],
146
+ 'Firefox OS' => %w[FOS KOS],
147
+ 'Gaming Console' => %w[WII PS3],
148
+ 'Google TV' => ['GTV'],
149
+ 'IBM' => ['OS2'],
150
+ 'iOS' => ['IOS'],
151
+ 'RISC OS' => ['ROS'],
152
+ 'GNU/Linux' => %w[LIN ARL DEB KNO MIN UBT KBT XBT LBT FED RHT VLN MDR GNT SAB SLW SSE CES BTR SAF ORD TOS],
153
+ 'Mac' => ['MAC'],
154
+ 'Mobile Gaming Console' => %w[PSP NDS XBX],
155
+ 'Real-time OS' => %w[MTK TDX],
156
+ 'Other Mobile' => %w[WOS POS SBA TIZ SMG MAE],
157
+ 'Symbian' => %w[SYM SYS SY3 S60 S40],
158
+ 'Unix' => %w[SOS AIX HPX BSD NBS OBS DFB SYL IRI T64 INF],
159
+ 'WebTV' => ['WTV'],
160
+ 'Windows' => ['WIN'],
161
+ 'Windows Mobile' => %w[WPH WMO WCE WRT WIO]
162
+ }.freeze
163
+
164
+ FAMILY_TO_OS = OS_FAMILIES.each_with_object({}) do |(family, oss), h|
165
+ oss.each { |os| h[os] = family }
157
166
  end
158
167
 
159
168
  def filenames
160
169
  ['oss.yml']
161
170
  end
162
-
163
171
  end
164
-
165
172
  end
@@ -1,7 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
- class Parser < Struct.new(:user_agent)
4
+ class Parser
5
+ ROOT = File.expand_path('../..', __dir__)
6
+
7
+ REGEX_CACHE = ::DeviceDetector::MemoryCache.new({})
8
+ private_constant :REGEX_CACHE
9
+
10
+ def initialize(user_agent)
11
+ @user_agent = user_agent
12
+ end
3
13
 
4
- ROOT = File.expand_path('../../..', __FILE__)
14
+ attr_reader :user_agent
5
15
 
6
16
  def name
7
17
  from_cache(['name', self.class.name, user_agent]) do
@@ -32,17 +42,17 @@ class DeviceDetector
32
42
  end
33
43
 
34
44
  def filenames
35
- fail NotImplementedError
45
+ raise NotImplementedError
36
46
  end
37
47
 
38
48
  def filepaths
39
49
  filenames.map do |filename|
40
- [ filename.to_sym, File.join(ROOT, 'regexes', filename) ]
50
+ [filename.to_sym, File.join(ROOT, 'regexes', filename)]
41
51
  end
42
52
  end
43
53
 
44
54
  def regexes_for(file_paths)
45
- from_cache(['regexes', self.class]) do
55
+ REGEX_CACHE.get_or_set(file_paths) do
46
56
  load_regexes(file_paths).flat_map { |path, regex| parse_regexes(path, regex) }
47
57
  end
48
58
  end
@@ -54,16 +64,20 @@ class DeviceDetector
54
64
  def symbolize_keys!(object)
55
65
  case object
56
66
  when Array
57
- object.map!{ |v| symbolize_keys!(v) }
67
+ object.map! { |v| symbolize_keys!(v) }
58
68
  when Hash
59
- object.keys.each{ |k| object[k.to_sym] = symbolize_keys!(object.delete(k)) if k.is_a?(String) }
69
+ keys = object.keys
70
+ keys.each do |k|
71
+ object[k.to_sym] = symbolize_keys!(object.delete(k)) if k.is_a?(String)
72
+ end
60
73
  end
61
74
  object
62
75
  end
63
76
 
64
77
  def parse_regexes(path, raw_regexes)
65
78
  raw_regexes.map do |meta|
66
- fail "invalid device spec: #{meta.inspect}" unless meta[:regex].is_a? String
79
+ raise "invalid device spec: #{meta.inspect}" unless meta[:regex].is_a? String
80
+
67
81
  meta[:regex] = build_regex(meta[:regex])
68
82
  meta[:path] = path
69
83
  meta
@@ -77,6 +91,5 @@ class DeviceDetector
77
91
  def from_cache(key)
78
92
  DeviceDetector.cache.get_or_set(key) { yield }
79
93
  end
80
-
81
94
  end
82
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DeviceDetector
4
- VERSION = '1.0.3'
4
+ VERSION = '1.0.4'
5
5
  end
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class VersionExtractor < MetadataExtractor
3
-
4
5
  private
5
6
 
6
7
  def metadata_string
7
8
  String(regex_meta[:version])
8
9
  end
9
-
10
10
  end
11
11
  end
12
-
@@ -1,7 +1,7 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
@@ -100,6 +100,14 @@
100
100
  name: 'Sarosys LLC'
101
101
  url: 'http://www.sarosys.com/'
102
102
 
103
+ - regex: 'AspiegelBot'
104
+ name: 'AspiegelBot'
105
+ category: 'Crawler'
106
+ url: 'https://aspiegel.com/'
107
+ producer:
108
+ name: 'Huawei'
109
+ url: 'https://www.huawei.com/'
110
+
103
111
  - regex: 'Castro 2, Episode Duration Lookup'
104
112
  name: 'Castro 2'
105
113
  category: 'Service Agent'
@@ -283,13 +291,13 @@
283
291
  name: 'CloudFlare'
284
292
  url: 'http://www.cloudflare.com'
285
293
 
286
- - regex: 'coccoc/'
294
+ - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
287
295
  name: 'Cốc Cốc Bot'
288
- url: 'http://help.coccoc.com/'
296
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
289
297
  category: 'Search bot'
290
298
  producer:
291
299
  name: 'Cốc Cốc'
292
- url: 'http://coccoc.com/'
300
+ url: 'https://coccoc.com/'
293
301
 
294
302
  - regex: 'collectd'
295
303
  name: 'Collectd'
@@ -443,7 +451,7 @@
443
451
  name: 'SEOmoz, Inc.'
444
452
  url: 'http://moz.com/'
445
453
 
446
- - regex: 'facebookexternalhit|facebookplatform'
454
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
447
455
  name: 'Facebook External Hit'
448
456
  category: 'Social Media Agent'
449
457
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -577,6 +585,14 @@
577
585
  name: 'Google Inc.'
578
586
  url: 'http://www.google.com'
579
587
 
588
+ - regex: 'Google-Cloud-Scheduler'
589
+ name: 'Google Cloud Scheduler'
590
+ category: 'Crawler'
591
+ url: 'https://cloud.google.com/scheduler'
592
+ producer:
593
+ name: 'Google Inc.'
594
+ url: 'https://www.google.com'
595
+
580
596
  - regex: 'Google-Structured-Data-Testing-Tool'
581
597
  name: 'Google Structured Data Testing Tool'
582
598
  category: 'Validator'
@@ -585,6 +601,14 @@
585
601
  name: 'Google Inc.'
586
602
  url: 'http://www.google.com'
587
603
 
604
+ - regex: 'GoogleStackdriverMonitoring'
605
+ name: 'Google Stackdriver Monitoring'
606
+ category: 'Site Monitor'
607
+ url: 'https://cloud.google.com/monitoring'
608
+ producer:
609
+ name: 'Google Inc.'
610
+ url: 'https://www.google.com'
611
+
588
612
  - regex: 'via ggpht\.com GoogleImageProxy'
589
613
  name: 'Gmail Image Proxy'
590
614
  category: 'Crawler'
@@ -592,7 +616,7 @@
592
616
  producer:
593
617
  name: 'Google Inc.'
594
618
  url: 'http://www.google.com'
595
-
619
+
596
620
  - regex: 'SeznamEmailProxy'
597
621
  name: 'Seznam Email Proxy'
598
622
  category: 'Crawler'
@@ -625,7 +649,7 @@
625
649
  name: 'Visual Meta'
626
650
  url: 'https://www.shopalike.cz/'
627
651
 
628
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|APIs-Google|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality|Google-Adwords-DisplayAds|Google-Assess|Google-AdWords-Express|Google-speakr|Google-Read-Aloud'
652
+ - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
629
653
  name: 'Googlebot'
630
654
  category: 'Search bot'
631
655
  url: 'http://www.google.com/bot.html'
@@ -1561,6 +1585,14 @@
1561
1585
  name: 'Wotbox'
1562
1586
  url: 'http://www.wotbox.com'
1563
1587
 
1588
+ - regex: 'XenForo'
1589
+ name: 'XenForo'
1590
+ category: 'Service Agent'
1591
+ url: 'https://xenforo.com/'
1592
+ producer:
1593
+ name: 'XenForo Ltd.'
1594
+ url: 'https://xenforo.com/'
1595
+
1564
1596
  - regex: 'yacybot'
1565
1597
  name: 'YaCy'
1566
1598
  category: 'Search bot'
@@ -1593,7 +1625,15 @@
1593
1625
  name: 'Yahoo! Inc.'
1594
1626
  url: 'http://www.yahoo.com'
1595
1627
 
1596
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1628
+ - regex: 'Y!J-BRW'
1629
+ name: 'Yahoo! Japan BRW'
1630
+ category: 'Crawler'
1631
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1632
+ producer:
1633
+ name: 'Yahoo! Japan Corp.'
1634
+ url: 'https://www.yahoo.co.jp/'
1635
+
1636
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1597
1637
  name: 'Yandex Bot'
1598
1638
  category: 'Search bot'
1599
1639
  url: 'http://www.yandex.com/bots'
@@ -1601,7 +1641,7 @@
1601
1641
  name: 'Yandex LLC'
1602
1642
  url: 'http://company.yandex.com'
1603
1643
 
1604
- - regex: 'Yeti'
1644
+ - regex: 'Yeti|NaverJapan'
1605
1645
  name: 'Yeti/Naverbot'
1606
1646
  category: 'Search bot'
1607
1647
  url: 'http://help.naver.com/robots/'
@@ -1683,9 +1723,9 @@
1683
1723
  name: 'HubPages'
1684
1724
  url: 'http://hubpages.com/'
1685
1725
 
1686
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1726
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1687
1727
  name: 'Pinterest'
1688
- url: ''
1728
+ url: 'http://www.pinterest.com/bot.html'
1689
1729
  category: 'Crawler'
1690
1730
  producer:
1691
1731
  name: 'Pinterest'
@@ -1805,7 +1845,7 @@
1805
1845
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1806
1846
  name: 'RSSRadio Bot'
1807
1847
 
1808
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1848
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1809
1849
  name: 'Generic Bot'
1810
1850
 
1811
1851
  - regex: '^sentry'
@@ -1824,6 +1864,182 @@
1824
1864
  name: 'The Knowledge AI'
1825
1865
  category: 'Crawler'
1826
1866
 
1867
+ - regex: 'Embedly'
1868
+ name: 'Embedly'
1869
+ category: 'Crawler'
1870
+ url: 'https://support.embed.ly/hc/en-us'
1871
+ producer:
1872
+ name: 'A Medium, Corp.'
1873
+ url: 'https://medium.com/'
1874
+
1875
+ - regex: 'BrandVerity'
1876
+ name: 'BrandVerity'
1877
+ category: 'Crawler'
1878
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1879
+ producer:
1880
+ name: 'BrandVerity, Inc.'
1881
+ url: 'https://www.brandverity.com/'
1882
+
1883
+ - regex: 'Kaspersky Lab CFR link resolver'
1884
+ name: 'Kaspersky'
1885
+ category: 'Security Checker'
1886
+ url: 'https://www.kaspersky.com/'
1887
+ producer:
1888
+ name: 'AO Kaspersky Lab'
1889
+ url: 'https://www.kaspersky.com/'
1890
+
1891
+ - regex: 'eZ Publish Link Validator'
1892
+ name: 'eZ Publish Link Validator'
1893
+ category: 'Crawler'
1894
+ url: 'https://ez.no/'
1895
+ producer:
1896
+ name: 'eZ Systems AS'
1897
+ url: 'https://ez.no/'
1898
+
1899
+ - regex: 'woorankreview'
1900
+ name: 'WooRank'
1901
+ category: 'Search bot'
1902
+ url: 'https://www.woorank.com/'
1903
+ producer:
1904
+ name: 'WooRank sprl'
1905
+ url: 'https://www.woorank.com/'
1906
+
1907
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1908
+ name: 'Siteimprove'
1909
+ category: 'Search bot'
1910
+ url: 'https://siteimprove.com/'
1911
+ producer:
1912
+ name: 'Siteimprove GmbH'
1913
+ url: 'https://siteimprove.com/'
1914
+
1915
+ - regex: 'CATExplorador'
1916
+ name: 'CATExplorador'
1917
+ category: 'Search bot'
1918
+ url: 'https://fundacio.cat/ca/domini/'
1919
+ producer:
1920
+ name: 'Fundació puntCAT'
1921
+ url: 'https://fundacio.cat/ca/domini/'
1922
+
1923
+ - regex: 'Buck'
1924
+ name: 'Buck'
1925
+ category: 'Search bot'
1926
+ url: 'https://hypefactors.com/'
1927
+ producer:
1928
+ name: 'Hypefactors A/S'
1929
+ url: 'https://hypefactors.com/'
1930
+
1931
+ - regex: 'tracemyfile'
1932
+ name: 'TraceMyFile'
1933
+ category: 'Search bot'
1934
+ url: 'https://www.tracemyfile.com/'
1935
+ producer:
1936
+ name: 'Idee Inc.'
1937
+ url: 'http://ideeinc.com/'
1938
+
1939
+ - regex: 'zelist.ro feed parser'
1940
+ name: 'Ze List'
1941
+ url: 'https://www.zelist.ro/'
1942
+ category: 'Feed Fetcher'
1943
+ producer:
1944
+ name: 'Treeworks SRL'
1945
+ url: 'https://www.tree.ro/'
1946
+
1947
+ - regex: 'weborama-fetcher'
1948
+ name: 'Weborama'
1949
+ category: 'Search bot'
1950
+ url: 'https://weborama.com/'
1951
+ producer:
1952
+ name: 'Weborama SA'
1953
+ url: 'https://weborama.com/'
1954
+
1955
+ - regex: 'BoardReader Favicon Fetcher'
1956
+ name: 'BoardReader'
1957
+ category: 'Search bot'
1958
+ url: 'http://boardreader.com/'
1959
+ producer:
1960
+ name: 'Effyis Inc'
1961
+ url: 'http://boardreader.com/'
1962
+
1963
+ - regex: 'IDG/IT'
1964
+ name: 'IDG/IT'
1965
+ category: 'Search bot'
1966
+ url: 'https://spaziodati.eu/'
1967
+ producer:
1968
+ name: 'SpazioDati S.r.l.'
1969
+ url: 'https://spaziodati.eu/'
1970
+
1971
+ - regex: 'Bytespider'
1972
+ name: 'Bytespider'
1973
+ category: 'Search bot'
1974
+ url: 'https://bytedance.com/'
1975
+ producer:
1976
+ name: 'ByteDance Ltd.'
1977
+ url: 'https://bytedance.com/'
1978
+
1979
+ - regex: 'WikiDo'
1980
+ name: 'WikiDo'
1981
+ category: 'Search bot'
1982
+ url: 'https://www.wikido.com/'
1983
+ producer:
1984
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
1985
+ url: 'https://www.wikido.com/'
1986
+
1987
+ - regex: 'AwarioSmartBot'
1988
+ name: 'Awario'
1989
+ category: 'Search bot'
1990
+ url: 'https://awario.com/bots.html'
1991
+ producer:
1992
+ name: 'Awario'
1993
+ url: 'https://awario.com/'
1994
+
1995
+ - regex: 'AwarioRssBot'
1996
+ name: 'Awario'
1997
+ category: 'Feed Fetcher'
1998
+ url: 'https://awario.com/bots.html'
1999
+ producer:
2000
+ name: 'Awario'
2001
+ url: 'https://awario.com/'
2002
+
2003
+ - regex: 'oBot'
2004
+ name: 'oBot'
2005
+ category: 'Search bot'
2006
+ url: 'http://www.xforce-security.com/crawler/'
2007
+ producer:
2008
+ name: 'IBM Germany Research & Development GmbH'
2009
+ url: 'https://exchange.xforce.ibmcloud.com/'
2010
+
2011
+ - regex: 'SMTBot'
2012
+ name: 'SMTBot'
2013
+ category: 'Search bot'
2014
+ url: 'https://www.similartech.com/smtbot'
2015
+ producer:
2016
+ name: 'SimilarTech Ltd.'
2017
+ url: 'https://www.similartech.com/'
2018
+
2019
+ - regex: 'LCC'
2020
+ name: 'LCC'
2021
+ category: 'Search bot'
2022
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2023
+ producer:
2024
+ name: 'Universität Leipzig'
2025
+ url: 'https://www.uni-leipzig.de/'
2026
+
2027
+ - regex: 'Startpagina-Linkchecker'
2028
+ name: 'Startpagina Linkchecker'
2029
+ category: 'Search bot'
2030
+ url: 'https://www.startpagina.nl/linkchecker'
2031
+ producer:
2032
+ name: 'Startpagina B.V.'
2033
+ url: 'https://www.startpagina.nl/'
2034
+
2035
+ - regex: 'GTmetrix'
2036
+ name: 'GTmetrix'
2037
+ category: 'Crawler'
2038
+ url: 'https://gtmetrix.com/'
2039
+ producer:
2040
+ name: 'Carbon60 Operating Co. Ltd.'
2041
+ url: 'https://www.carbon60.com/'
2042
+
1827
2043
  # Generic detections
1828
2044
 
1829
2045
  - regex: 'Nutch'