device_detector 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +5 -5
  2. data/.rubocop.yml +49 -0
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +1 -1
  5. data/CHANGELOG.md +5 -0
  6. data/README.md +4 -4
  7. data/Rakefile +6 -17
  8. data/device_detector.gemspec +1 -0
  9. data/lib/device_detector.rb +17 -28
  10. data/lib/device_detector/bot.rb +2 -2
  11. data/lib/device_detector/client.rb +3 -2
  12. data/lib/device_detector/device.rb +44 -21
  13. data/lib/device_detector/memory_cache.rb +5 -5
  14. data/lib/device_detector/metadata_extractor.rb +7 -8
  15. data/lib/device_detector/model_extractor.rb +3 -3
  16. data/lib/device_detector/name_extractor.rb +2 -2
  17. data/lib/device_detector/os.rb +121 -114
  18. data/lib/device_detector/parser.rb +22 -9
  19. data/lib/device_detector/version.rb +1 -1
  20. data/lib/device_detector/version_extractor.rb +2 -3
  21. data/regexes/bots.yml +228 -12
  22. data/regexes/client/browser_engine.yml +4 -1
  23. data/regexes/client/browsers.yml +358 -34
  24. data/regexes/client/feed_readers.yml +1 -1
  25. data/regexes/client/libraries.yml +16 -1
  26. data/regexes/client/mediaplayers.yml +14 -2
  27. data/regexes/client/mobile_apps.yml +26 -2
  28. data/regexes/client/pim.yml +1 -1
  29. data/regexes/device/cameras.yml +1 -1
  30. data/regexes/device/car_browsers.yml +7 -3
  31. data/regexes/device/consoles.yml +3 -3
  32. data/regexes/device/mobiles.yml +2468 -823
  33. data/regexes/device/portable_media_player.yml +2 -2
  34. data/regexes/device/televisions.yml +1 -1
  35. data/regexes/oss.yml +20 -5
  36. data/regexes/vendorfragments.yml +5 -1
  37. data/spec/device_detector/device_spec.rb +4 -24
  38. data/spec/fixtures/client/browser.yml +632 -2
  39. data/spec/fixtures/client/library.yml +18 -0
  40. data/spec/fixtures/client/mobile_app.yml +24 -0
  41. data/spec/fixtures/detector/bots.yml +533 -67
  42. data/spec/fixtures/detector/camera.yml +20 -0
  43. data/spec/fixtures/detector/car_browser.yml +60 -0
  44. data/spec/fixtures/detector/console.yml +40 -0
  45. data/spec/fixtures/detector/desktop.yml +140 -40
  46. data/spec/fixtures/detector/mediaplayer.yml +43 -0
  47. data/spec/fixtures/detector/mobile_apps.yml +40 -2
  48. data/spec/fixtures/detector/phablet.yml +383 -1
  49. data/spec/fixtures/detector/smart_speaker.yml +55 -0
  50. data/spec/fixtures/detector/smartphone-1.yml +3669 -3695
  51. data/spec/fixtures/detector/smartphone-10.yml +4147 -4094
  52. data/spec/fixtures/detector/smartphone-11.yml +4475 -4473
  53. data/spec/fixtures/detector/smartphone-12.yml +5502 -4251
  54. data/spec/fixtures/detector/smartphone-13.yml +9920 -0
  55. data/spec/fixtures/detector/smartphone-14.yml +2662 -0
  56. data/spec/fixtures/detector/smartphone-2.yml +3738 -3713
  57. data/spec/fixtures/detector/smartphone-3.yml +3970 -3912
  58. data/spec/fixtures/detector/smartphone-4.yml +4518 -4506
  59. data/spec/fixtures/detector/smartphone-5.yml +3645 -3664
  60. data/spec/fixtures/detector/smartphone-6.yml +3868 -3829
  61. data/spec/fixtures/detector/smartphone-7.yml +3992 -3985
  62. data/spec/fixtures/detector/smartphone-8.yml +4510 -4475
  63. data/spec/fixtures/detector/smartphone-9.yml +4226 -4255
  64. data/spec/fixtures/detector/smartphone.yml +3197 -3197
  65. data/spec/fixtures/detector/tablet-1.yml +3940 -3968
  66. data/spec/fixtures/detector/tablet-2.yml +3773 -3764
  67. data/spec/fixtures/detector/tablet-3.yml +5243 -3071
  68. data/spec/fixtures/detector/tablet-4.yml +4528 -0
  69. data/spec/fixtures/detector/tablet.yml +3145 -3125
  70. data/spec/fixtures/detector/tv.yml +1695 -95
  71. data/spec/fixtures/detector/unknown.yml +13 -60
  72. data/spec/fixtures/detector/wearable.yml +61 -0
  73. data/spec/fixtures/device/car_browser.yml +6 -0
  74. data/spec/fixtures/parser/oss.yml +36 -1
  75. data/spec/fixtures/parser/vendorfragments.yml +6 -0
  76. metadata +32 -7
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class ModelExtractor < MetadataExtractor
3
-
4
5
  def call
5
- s = super.to_s.gsub('_',' ').strip
6
+ s = super.to_s.gsub('_', ' ').strip
6
7
  s = s.gsub(/ TD$/i, '')
7
8
 
8
9
  return nil if s == 'Build'
@@ -19,6 +20,5 @@ class DeviceDetector
19
20
  def regex
20
21
  @regex ||= regex_meta[:regex_model] || regex_meta[:regex]
21
22
  end
22
-
23
23
  end
24
24
  end
@@ -1,6 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class NameExtractor < MetadataExtractor
3
-
4
5
  def call
5
6
  if /\$[0-9]/ =~ metadata_string
6
7
  extract_metadata
@@ -14,6 +15,5 @@ class DeviceDetector
14
15
  def metadata_string
15
16
  regex_meta[:name]
16
17
  end
17
-
18
18
  end
19
19
  end
@@ -1,8 +1,9 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
 
3
5
  class DeviceDetector
4
6
  class OS < Parser
5
-
6
7
  def name
7
8
  os_info[:name]
8
9
  end
@@ -38,128 +39,134 @@ class DeviceDetector
38
39
  end
39
40
  end
40
41
 
41
- DESKTOP_OSS = Set.new(['AmigaOS', 'IBM', 'GNU/Linux', 'Mac', 'Unix', 'Windows', 'BeOS', 'Chrome OS'])
42
+ DESKTOP_OSS = Set.new(
43
+ [
44
+ 'AmigaOS', 'IBM', 'GNU/Linux', 'Mac', 'Unix', 'Windows', 'BeOS', 'Chrome OS'
45
+ ]
46
+ )
42
47
 
43
48
  # OS short codes mapped to long names
44
49
  OPERATING_SYSTEMS = {
45
- 'AIX' => 'AIX',
46
- 'AND' => 'Android',
47
- 'AMG' => 'AmigaOS',
48
- 'ATV' => 'Apple TV',
49
- 'ARL' => 'Arch Linux',
50
- 'BTR' => 'BackTrack',
51
- 'SBA' => 'Bada',
52
- 'BEO' => 'BeOS',
53
- 'BLB' => 'BlackBerry OS',
54
- 'QNX' => 'BlackBerry Tablet OS',
55
- 'BMP' => 'Brew',
56
- 'CES' => 'CentOS',
57
- 'COS' => 'Chrome OS',
58
- 'CYN' => 'CyanogenMod',
59
- 'DEB' => 'Debian',
60
- 'DFB' => 'DragonFly',
61
- 'FED' => 'Fedora',
62
- 'FOS' => 'Firefox OS',
63
- 'FIR' => 'Fire OS',
64
- 'BSD' => 'FreeBSD',
65
- 'GNT' => 'Gentoo',
66
- 'GTV' => 'Google TV',
67
- 'HPX' => 'HP-UX',
68
- 'HAI' => 'Haiku OS',
69
- 'IRI' => 'IRIX',
70
- 'INF' => 'Inferno',
71
- 'KOS' => 'KaiOS',
72
- 'KNO' => 'Knoppix',
73
- 'KBT' => 'Kubuntu',
74
- 'LIN' => 'GNU/Linux',
75
- 'LBT' => 'Lubuntu',
76
- 'VLN' => 'VectorLinux',
77
- 'MAC' => 'Mac',
78
- 'MAE' => 'Maemo',
79
- 'MDR' => 'Mandriva',
80
- 'SMG' => 'MeeGo',
81
- 'MCD' => 'MocorDroid',
82
- 'MIN' => 'Mint',
83
- 'MLD' => 'MildWild',
84
- 'MOR' => 'MorphOS',
85
- 'NBS' => 'NetBSD',
86
- 'MTK' => 'MTK / Nucleus',
87
- 'WII' => 'Nintendo',
88
- 'NDS' => 'Nintendo Mobile',
89
- 'OS2' => 'OS/2',
90
- 'T64' => 'OSF1',
91
- 'OBS' => 'OpenBSD',
92
- 'PSP' => 'PlayStation Portable',
93
- 'PS3' => 'PlayStation',
94
- 'RHT' => 'Red Hat',
95
- 'ROS' => 'RISC OS',
96
- 'REM' => 'Remix OS',
97
- 'RZD' => 'RazoDroiD',
98
- 'SAB' => 'Sabayon',
99
- 'SSE' => 'SUSE',
100
- 'SAF' => 'Sailfish OS',
101
- 'SLW' => 'Slackware',
102
- 'SOS' => 'Solaris',
103
- 'SYL' => 'Syllable',
104
- 'SYM' => 'Symbian',
105
- 'SYS' => 'Symbian OS',
106
- 'S40' => 'Symbian OS Series 40',
107
- 'S60' => 'Symbian OS Series 60',
108
- 'SY3' => 'Symbian^3',
109
- 'TDX' => 'ThreadX',
110
- 'TIZ' => 'Tizen',
111
- 'UBT' => 'Ubuntu',
112
- 'WTV' => 'WebTV',
113
- 'WIN' => 'Windows',
114
- 'WCE' => 'Windows CE',
115
- 'WIO' => 'Windows IoT',
116
- 'WMO' => 'Windows Mobile',
117
- 'WPH' => 'Windows Phone',
118
- 'WRT' => 'Windows RT',
119
- 'XBX' => 'Xbox',
120
- 'XBT' => 'Xubuntu',
121
- 'YNS' => 'YunOs',
122
- 'IOS' => 'iOS',
123
- 'POS' => 'palmOS',
124
- 'WOS' => 'webOS'
125
- }
126
-
127
- DOWNCASED_OPERATING_SYSTEMS = OPERATING_SYSTEMS.each_with_object({}){|(short,long),h| h[long.downcase] = short}
50
+ 'AIX' => 'AIX',
51
+ 'AND' => 'Android',
52
+ 'AMG' => 'AmigaOS',
53
+ 'ATV' => 'Apple TV',
54
+ 'ARL' => 'Arch Linux',
55
+ 'BTR' => 'BackTrack',
56
+ 'SBA' => 'Bada',
57
+ 'BEO' => 'BeOS',
58
+ 'BLB' => 'BlackBerry OS',
59
+ 'QNX' => 'BlackBerry Tablet OS',
60
+ 'BMP' => 'Brew',
61
+ 'CES' => 'CentOS',
62
+ 'COS' => 'Chrome OS',
63
+ 'CYN' => 'CyanogenMod',
64
+ 'DEB' => 'Debian',
65
+ 'DFB' => 'DragonFly',
66
+ 'FED' => 'Fedora',
67
+ 'FOS' => 'Firefox OS',
68
+ 'FIR' => 'Fire OS',
69
+ 'BSD' => 'FreeBSD',
70
+ 'GNT' => 'Gentoo',
71
+ 'GTV' => 'Google TV',
72
+ 'HPX' => 'HP-UX',
73
+ 'HAI' => 'Haiku OS',
74
+ 'IRI' => 'IRIX',
75
+ 'INF' => 'Inferno',
76
+ 'KOS' => 'KaiOS',
77
+ 'KNO' => 'Knoppix',
78
+ 'KBT' => 'Kubuntu',
79
+ 'LIN' => 'GNU/Linux',
80
+ 'LBT' => 'Lubuntu',
81
+ 'VLN' => 'VectorLinux',
82
+ 'MAC' => 'Mac',
83
+ 'MAE' => 'Maemo',
84
+ 'MDR' => 'Mandriva',
85
+ 'SMG' => 'MeeGo',
86
+ 'MCD' => 'MocorDroid',
87
+ 'MIN' => 'Mint',
88
+ 'MLD' => 'MildWild',
89
+ 'MOR' => 'MorphOS',
90
+ 'NBS' => 'NetBSD',
91
+ 'MTK' => 'MTK / Nucleus',
92
+ 'WII' => 'Nintendo',
93
+ 'NDS' => 'Nintendo Mobile',
94
+ 'OS2' => 'OS/2',
95
+ 'T64' => 'OSF1',
96
+ 'OBS' => 'OpenBSD',
97
+ 'ORD' => 'Ordissimo',
98
+ 'PSP' => 'PlayStation Portable',
99
+ 'PS3' => 'PlayStation',
100
+ 'RHT' => 'Red Hat',
101
+ 'ROS' => 'RISC OS',
102
+ 'REM' => 'Remix OS',
103
+ 'RZD' => 'RazoDroiD',
104
+ 'SAB' => 'Sabayon',
105
+ 'SSE' => 'SUSE',
106
+ 'SAF' => 'Sailfish OS',
107
+ 'SLW' => 'Slackware',
108
+ 'SOS' => 'Solaris',
109
+ 'SYL' => 'Syllable',
110
+ 'SYM' => 'Symbian',
111
+ 'SYS' => 'Symbian OS',
112
+ 'S40' => 'Symbian OS Series 40',
113
+ 'S60' => 'Symbian OS Series 60',
114
+ 'SY3' => 'Symbian^3',
115
+ 'TDX' => 'ThreadX',
116
+ 'TIZ' => 'Tizen',
117
+ 'TOS' => 'TmaxOS',
118
+ 'UBT' => 'Ubuntu',
119
+ 'WTV' => 'WebTV',
120
+ 'WIN' => 'Windows',
121
+ 'WCE' => 'Windows CE',
122
+ 'WIO' => 'Windows IoT',
123
+ 'WMO' => 'Windows Mobile',
124
+ 'WPH' => 'Windows Phone',
125
+ 'WRT' => 'Windows RT',
126
+ 'XBX' => 'Xbox',
127
+ 'XBT' => 'Xubuntu',
128
+ 'YNS' => 'YunOs',
129
+ 'IOS' => 'iOS',
130
+ 'POS' => 'palmOS',
131
+ 'WOS' => 'webOS'
132
+ }.freeze
133
+
134
+ DOWNCASED_OPERATING_SYSTEMS = OPERATING_SYSTEMS.each_with_object({}) do |(short, long), h|
135
+ h[long.downcase] = short
136
+ end
128
137
 
129
138
  OS_FAMILIES = {
130
- 'Android' => ['AND', 'CYN', 'FIR', 'REM', 'RZD', 'MLD', 'MCD', 'YNS'],
131
- 'AmigaOS' => ['AMG', 'MOR'],
132
- 'Apple TV' => ['ATV'],
133
- 'BlackBerry' => ['BLB', 'QNX'],
134
- 'Brew' => ['BMP'],
135
- 'BeOS' => ['BEO', 'HAI'],
136
- 'Chrome OS' => ['COS'],
137
- 'Firefox OS' => ['FOS', 'KOS'],
138
- 'Gaming Console' => ['WII', 'PS3'],
139
- 'Google TV' => ['GTV'],
140
- 'IBM' => ['OS2'],
141
- 'iOS' => ['IOS'],
142
- 'RISC OS' => ['ROS'],
143
- 'GNU/Linux' => ['LIN', 'ARL', 'DEB', 'KNO', 'MIN', 'UBT', 'KBT', 'XBT', 'LBT', 'FED', 'RHT', 'VLN', 'MDR', 'GNT', 'SAB', 'SLW', 'SSE', 'CES', 'BTR', 'SAF'],
144
- 'Mac' => ['MAC'],
145
- 'Mobile Gaming Console' => ['PSP', 'NDS', 'XBX'],
146
- 'Real-time OS' => ['MTK', 'TDX'],
147
- 'Other Mobile' => ['WOS', 'POS', 'SBA', 'TIZ', 'SMG', 'MAE'],
148
- 'Symbian' => ['SYM', 'SYS', 'SY3', 'S60', 'S40'],
149
- 'Unix' => ['SOS', 'AIX', 'HPX', 'BSD', 'NBS', 'OBS', 'DFB', 'SYL', 'IRI', 'T64', 'INF'],
150
- 'WebTV' => ['WTV'],
151
- 'Windows' => ['WIN'],
152
- 'Windows Mobile' => ['WPH', 'WMO', 'WCE', 'WRT', 'WIO']
153
- }
154
-
155
- FAMILY_TO_OS = OS_FAMILIES.each_with_object({}) do |(family,oss),h|
156
- oss.each{|os| h[os] = family}
139
+ 'Android' => %w[AND CYN FIR REM RZD MLD MCD YNS],
140
+ 'AmigaOS' => %w[AMG MOR],
141
+ 'Apple TV' => ['ATV'],
142
+ 'BlackBerry' => %w[BLB QNX],
143
+ 'Brew' => ['BMP'],
144
+ 'BeOS' => %w[BEO HAI],
145
+ 'Chrome OS' => ['COS'],
146
+ 'Firefox OS' => %w[FOS KOS],
147
+ 'Gaming Console' => %w[WII PS3],
148
+ 'Google TV' => ['GTV'],
149
+ 'IBM' => ['OS2'],
150
+ 'iOS' => ['IOS'],
151
+ 'RISC OS' => ['ROS'],
152
+ 'GNU/Linux' => %w[LIN ARL DEB KNO MIN UBT KBT XBT LBT FED RHT VLN MDR GNT SAB SLW SSE CES BTR SAF ORD TOS],
153
+ 'Mac' => ['MAC'],
154
+ 'Mobile Gaming Console' => %w[PSP NDS XBX],
155
+ 'Real-time OS' => %w[MTK TDX],
156
+ 'Other Mobile' => %w[WOS POS SBA TIZ SMG MAE],
157
+ 'Symbian' => %w[SYM SYS SY3 S60 S40],
158
+ 'Unix' => %w[SOS AIX HPX BSD NBS OBS DFB SYL IRI T64 INF],
159
+ 'WebTV' => ['WTV'],
160
+ 'Windows' => ['WIN'],
161
+ 'Windows Mobile' => %w[WPH WMO WCE WRT WIO]
162
+ }.freeze
163
+
164
+ FAMILY_TO_OS = OS_FAMILIES.each_with_object({}) do |(family, oss), h|
165
+ oss.each { |os| h[os] = family }
157
166
  end
158
167
 
159
168
  def filenames
160
169
  ['oss.yml']
161
170
  end
162
-
163
171
  end
164
-
165
172
  end
@@ -1,7 +1,17 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
- class Parser < Struct.new(:user_agent)
4
+ class Parser
5
+ ROOT = File.expand_path('../..', __dir__)
6
+
7
+ REGEX_CACHE = ::DeviceDetector::MemoryCache.new({})
8
+ private_constant :REGEX_CACHE
9
+
10
+ def initialize(user_agent)
11
+ @user_agent = user_agent
12
+ end
3
13
 
4
- ROOT = File.expand_path('../../..', __FILE__)
14
+ attr_reader :user_agent
5
15
 
6
16
  def name
7
17
  from_cache(['name', self.class.name, user_agent]) do
@@ -32,17 +42,17 @@ class DeviceDetector
32
42
  end
33
43
 
34
44
  def filenames
35
- fail NotImplementedError
45
+ raise NotImplementedError
36
46
  end
37
47
 
38
48
  def filepaths
39
49
  filenames.map do |filename|
40
- [ filename.to_sym, File.join(ROOT, 'regexes', filename) ]
50
+ [filename.to_sym, File.join(ROOT, 'regexes', filename)]
41
51
  end
42
52
  end
43
53
 
44
54
  def regexes_for(file_paths)
45
- from_cache(['regexes', self.class]) do
55
+ REGEX_CACHE.get_or_set(file_paths) do
46
56
  load_regexes(file_paths).flat_map { |path, regex| parse_regexes(path, regex) }
47
57
  end
48
58
  end
@@ -54,16 +64,20 @@ class DeviceDetector
54
64
  def symbolize_keys!(object)
55
65
  case object
56
66
  when Array
57
- object.map!{ |v| symbolize_keys!(v) }
67
+ object.map! { |v| symbolize_keys!(v) }
58
68
  when Hash
59
- object.keys.each{ |k| object[k.to_sym] = symbolize_keys!(object.delete(k)) if k.is_a?(String) }
69
+ keys = object.keys
70
+ keys.each do |k|
71
+ object[k.to_sym] = symbolize_keys!(object.delete(k)) if k.is_a?(String)
72
+ end
60
73
  end
61
74
  object
62
75
  end
63
76
 
64
77
  def parse_regexes(path, raw_regexes)
65
78
  raw_regexes.map do |meta|
66
- fail "invalid device spec: #{meta.inspect}" unless meta[:regex].is_a? String
79
+ raise "invalid device spec: #{meta.inspect}" unless meta[:regex].is_a? String
80
+
67
81
  meta[:regex] = build_regex(meta[:regex])
68
82
  meta[:path] = path
69
83
  meta
@@ -77,6 +91,5 @@ class DeviceDetector
77
91
  def from_cache(key)
78
92
  DeviceDetector.cache.get_or_set(key) { yield }
79
93
  end
80
-
81
94
  end
82
95
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class DeviceDetector
4
- VERSION = '1.0.3'
4
+ VERSION = '1.0.4'
5
5
  end
@@ -1,12 +1,11 @@
1
+ # frozen_string_literal: true
2
+
1
3
  class DeviceDetector
2
4
  class VersionExtractor < MetadataExtractor
3
-
4
5
  private
5
6
 
6
7
  def metadata_string
7
8
  String(regex_meta[:version])
8
9
  end
9
-
10
10
  end
11
11
  end
12
-
@@ -1,7 +1,7 @@
1
1
  ###############
2
2
  # Device Detector - The Universal Device Detection library for parsing User Agents
3
3
  #
4
- # @link http://piwik.org
4
+ # @link https://matomo.org
5
5
  # @license http://www.gnu.org/licenses/lgpl.html LGPL v3 or later
6
6
  ###############
7
7
 
@@ -100,6 +100,14 @@
100
100
  name: 'Sarosys LLC'
101
101
  url: 'http://www.sarosys.com/'
102
102
 
103
+ - regex: 'AspiegelBot'
104
+ name: 'AspiegelBot'
105
+ category: 'Crawler'
106
+ url: 'https://aspiegel.com/'
107
+ producer:
108
+ name: 'Huawei'
109
+ url: 'https://www.huawei.com/'
110
+
103
111
  - regex: 'Castro 2, Episode Duration Lookup'
104
112
  name: 'Castro 2'
105
113
  category: 'Service Agent'
@@ -283,13 +291,13 @@
283
291
  name: 'CloudFlare'
284
292
  url: 'http://www.cloudflare.com'
285
293
 
286
- - regex: 'coccoc/'
294
+ - regex: 'coccoc|coccocbot(-ads|-fast|-image|-shopping|-web)?'
287
295
  name: 'Cốc Cốc Bot'
288
- url: 'http://help.coccoc.com/'
296
+ url: 'https://help.coccoc.com/en/search-engine/coccoc-robots'
289
297
  category: 'Search bot'
290
298
  producer:
291
299
  name: 'Cốc Cốc'
292
- url: 'http://coccoc.com/'
300
+ url: 'https://coccoc.com/'
293
301
 
294
302
  - regex: 'collectd'
295
303
  name: 'Collectd'
@@ -443,7 +451,7 @@
443
451
  name: 'SEOmoz, Inc.'
444
452
  url: 'http://moz.com/'
445
453
 
446
- - regex: 'facebookexternalhit|facebookplatform'
454
+ - regex: 'facebookexternalhit|facebookplatform|facebookexternalua'
447
455
  name: 'Facebook External Hit'
448
456
  category: 'Social Media Agent'
449
457
  url: 'https://www.facebook.com/externalhit_uatext.php'
@@ -577,6 +585,14 @@
577
585
  name: 'Google Inc.'
578
586
  url: 'http://www.google.com'
579
587
 
588
+ - regex: 'Google-Cloud-Scheduler'
589
+ name: 'Google Cloud Scheduler'
590
+ category: 'Crawler'
591
+ url: 'https://cloud.google.com/scheduler'
592
+ producer:
593
+ name: 'Google Inc.'
594
+ url: 'https://www.google.com'
595
+
580
596
  - regex: 'Google-Structured-Data-Testing-Tool'
581
597
  name: 'Google Structured Data Testing Tool'
582
598
  category: 'Validator'
@@ -585,6 +601,14 @@
585
601
  name: 'Google Inc.'
586
602
  url: 'http://www.google.com'
587
603
 
604
+ - regex: 'GoogleStackdriverMonitoring'
605
+ name: 'Google Stackdriver Monitoring'
606
+ category: 'Site Monitor'
607
+ url: 'https://cloud.google.com/monitoring'
608
+ producer:
609
+ name: 'Google Inc.'
610
+ url: 'https://www.google.com'
611
+
588
612
  - regex: 'via ggpht\.com GoogleImageProxy'
589
613
  name: 'Gmail Image Proxy'
590
614
  category: 'Crawler'
@@ -592,7 +616,7 @@
592
616
  producer:
593
617
  name: 'Google Inc.'
594
618
  url: 'http://www.google.com'
595
-
619
+
596
620
  - regex: 'SeznamEmailProxy'
597
621
  name: 'Seznam Email Proxy'
598
622
  category: 'Crawler'
@@ -625,7 +649,7 @@
625
649
  name: 'Visual Meta'
626
650
  url: 'https://www.shopalike.cz/'
627
651
 
628
- - regex: 'Googlebot(-Mobile|-Image|-Video|-News)?|Feedfetcher-Google|Google-Test|Google-Site-Verification|Google Web Preview|AdsBot-Google(-Mobile)?|Google-Adwords-Instant|APIs-Google|Mediapartners-Google|Google.*/\+/web/snippet|GoogleProducer|Google[ -]Publisher[ -]Plugin|Google-Shopping-Quality|Google-Adwords-DisplayAds|Google-Assess|Google-AdWords-Express|Google-speakr|Google-Read-Aloud'
652
+ - regex: 'AdsBot-Google(-Mobile)?|Adwords-(DisplayAds|Express|Instant)|Google Web Preview|Google[ -]Publisher[ -]Plugin|Google-(Adwords|AMPHTML|Assess|HotelAdsVerifier|Read-Aloud|Shopping-Quality|Site-Verification|speakr|Test|Youtube-Links)|(APIs|DuplexWeb|Feedfetcher|Mediapartners)-Google|Googlebot(-Mobile|-Image|-Video|-News)?|GoogleProducer|Google.*/\+/web/snippet'
629
653
  name: 'Googlebot'
630
654
  category: 'Search bot'
631
655
  url: 'http://www.google.com/bot.html'
@@ -1561,6 +1585,14 @@
1561
1585
  name: 'Wotbox'
1562
1586
  url: 'http://www.wotbox.com'
1563
1587
 
1588
+ - regex: 'XenForo'
1589
+ name: 'XenForo'
1590
+ category: 'Service Agent'
1591
+ url: 'https://xenforo.com/'
1592
+ producer:
1593
+ name: 'XenForo Ltd.'
1594
+ url: 'https://xenforo.com/'
1595
+
1564
1596
  - regex: 'yacybot'
1565
1597
  name: 'YaCy'
1566
1598
  category: 'Search bot'
@@ -1593,7 +1625,15 @@
1593
1625
  name: 'Yahoo! Inc.'
1594
1626
  url: 'http://www.yahoo.com'
1595
1627
 
1596
- - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher'
1628
+ - regex: 'Y!J-BRW'
1629
+ name: 'Yahoo! Japan BRW'
1630
+ category: 'Crawler'
1631
+ url: 'https://www.yahoo-help.jp/app/answers/detail/p/595/a_id/42716/~/ウェブページにアクセスするシステムのユーザーエージェントについて'
1632
+ producer:
1633
+ name: 'Yahoo! Japan Corp.'
1634
+ url: 'https://www.yahoo.co.jp/'
1635
+
1636
+ - regex: 'Yandex(SpravBot|ScreenshotBot|MobileBot|AccessibilityBot|ForDomain|Vertis|Market|Catalog|Calendar|Sitelinks|AdNet|Pagechecker|Webmaster|Media|Video|Bot|Images|Antivirus|Direct|Blogs|Favicons|ImageResizer|Verticals|News(links)?|Metrika|\.Gazeta Bot)|YaDirectFetcher|YandexTurbo|YandexTracker|YandexSearchShop|YandexRCA|YandexPartner|YandexOntoDBAPI|YandexOntoDB|YandexMobileScreenShotBot'
1597
1637
  name: 'Yandex Bot'
1598
1638
  category: 'Search bot'
1599
1639
  url: 'http://www.yandex.com/bots'
@@ -1601,7 +1641,7 @@
1601
1641
  name: 'Yandex LLC'
1602
1642
  url: 'http://company.yandex.com'
1603
1643
 
1604
- - regex: 'Yeti'
1644
+ - regex: 'Yeti|NaverJapan'
1605
1645
  name: 'Yeti/Naverbot'
1606
1646
  category: 'Search bot'
1607
1647
  url: 'http://help.naver.com/robots/'
@@ -1683,9 +1723,9 @@
1683
1723
  name: 'HubPages'
1684
1724
  url: 'http://hubpages.com/'
1685
1725
 
1686
- - regex: 'Pinterest/\d\.\d.*www\.pinterest\.com.*'
1726
+ - regex: 'Pinterest(bot)?/\d\.\d.*www\.pinterest\.com.*'
1687
1727
  name: 'Pinterest'
1688
- url: ''
1728
+ url: 'http://www.pinterest.com/bot.html'
1689
1729
  category: 'Crawler'
1690
1730
  producer:
1691
1731
  name: 'Pinterest'
@@ -1805,7 +1845,7 @@
1805
1845
  - regex: 'RSSRadio \(Push Notification Scanner;support@dorada\.co\.uk\)'
1806
1846
  name: 'RSSRadio Bot'
1807
1847
 
1808
- - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|Minimo|RackspaceBot)'
1848
+ - regex: '(A6-Indexer|nuhk|TsolCrawler|Yammybot|Openbot|Gulper Web Bot|grub-client|Download Demon|SearchExpress|Microsoft URL Control|borg|altavista|dataminr.com|tweetedtimes.com|TrendsmapResolver|teoma|blitzbot|oegp|furlbot|http%20client|polybot|htdig|mogimogi|larbin|scrubby|searchsight|seekbot|semanticdiscovery|snappy|vortex(?! Build)|zeal|fast-webcrawler|converacrawler|dataparksearch|findlinks|BrowserMob|HttpMonitor|ThumbShotsBot|URL2PNG|ZooShot|GomezA|Google SketchUp|Read%20Later|RackspaceBot|robots|SeopultContentAnalyzer|7Siters|centuryb.o.t9)'
1809
1849
  name: 'Generic Bot'
1810
1850
 
1811
1851
  - regex: '^sentry'
@@ -1824,6 +1864,182 @@
1824
1864
  name: 'The Knowledge AI'
1825
1865
  category: 'Crawler'
1826
1866
 
1867
+ - regex: 'Embedly'
1868
+ name: 'Embedly'
1869
+ category: 'Crawler'
1870
+ url: 'https://support.embed.ly/hc/en-us'
1871
+ producer:
1872
+ name: 'A Medium, Corp.'
1873
+ url: 'https://medium.com/'
1874
+
1875
+ - regex: 'BrandVerity'
1876
+ name: 'BrandVerity'
1877
+ category: 'Crawler'
1878
+ url: 'https://www.brandverity.com/why-is-brandverity-visiting-me'
1879
+ producer:
1880
+ name: 'BrandVerity, Inc.'
1881
+ url: 'https://www.brandverity.com/'
1882
+
1883
+ - regex: 'Kaspersky Lab CFR link resolver'
1884
+ name: 'Kaspersky'
1885
+ category: 'Security Checker'
1886
+ url: 'https://www.kaspersky.com/'
1887
+ producer:
1888
+ name: 'AO Kaspersky Lab'
1889
+ url: 'https://www.kaspersky.com/'
1890
+
1891
+ - regex: 'eZ Publish Link Validator'
1892
+ name: 'eZ Publish Link Validator'
1893
+ category: 'Crawler'
1894
+ url: 'https://ez.no/'
1895
+ producer:
1896
+ name: 'eZ Systems AS'
1897
+ url: 'https://ez.no/'
1898
+
1899
+ - regex: 'woorankreview'
1900
+ name: 'WooRank'
1901
+ category: 'Search bot'
1902
+ url: 'https://www.woorank.com/'
1903
+ producer:
1904
+ name: 'WooRank sprl'
1905
+ url: 'https://www.woorank.com/'
1906
+
1907
+ - regex: '(Match|LinkCheck) by Siteimprove.com'
1908
+ name: 'Siteimprove'
1909
+ category: 'Search bot'
1910
+ url: 'https://siteimprove.com/'
1911
+ producer:
1912
+ name: 'Siteimprove GmbH'
1913
+ url: 'https://siteimprove.com/'
1914
+
1915
+ - regex: 'CATExplorador'
1916
+ name: 'CATExplorador'
1917
+ category: 'Search bot'
1918
+ url: 'https://fundacio.cat/ca/domini/'
1919
+ producer:
1920
+ name: 'Fundació puntCAT'
1921
+ url: 'https://fundacio.cat/ca/domini/'
1922
+
1923
+ - regex: 'Buck'
1924
+ name: 'Buck'
1925
+ category: 'Search bot'
1926
+ url: 'https://hypefactors.com/'
1927
+ producer:
1928
+ name: 'Hypefactors A/S'
1929
+ url: 'https://hypefactors.com/'
1930
+
1931
+ - regex: 'tracemyfile'
1932
+ name: 'TraceMyFile'
1933
+ category: 'Search bot'
1934
+ url: 'https://www.tracemyfile.com/'
1935
+ producer:
1936
+ name: 'Idee Inc.'
1937
+ url: 'http://ideeinc.com/'
1938
+
1939
+ - regex: 'zelist.ro feed parser'
1940
+ name: 'Ze List'
1941
+ url: 'https://www.zelist.ro/'
1942
+ category: 'Feed Fetcher'
1943
+ producer:
1944
+ name: 'Treeworks SRL'
1945
+ url: 'https://www.tree.ro/'
1946
+
1947
+ - regex: 'weborama-fetcher'
1948
+ name: 'Weborama'
1949
+ category: 'Search bot'
1950
+ url: 'https://weborama.com/'
1951
+ producer:
1952
+ name: 'Weborama SA'
1953
+ url: 'https://weborama.com/'
1954
+
1955
+ - regex: 'BoardReader Favicon Fetcher'
1956
+ name: 'BoardReader'
1957
+ category: 'Search bot'
1958
+ url: 'http://boardreader.com/'
1959
+ producer:
1960
+ name: 'Effyis Inc'
1961
+ url: 'http://boardreader.com/'
1962
+
1963
+ - regex: 'IDG/IT'
1964
+ name: 'IDG/IT'
1965
+ category: 'Search bot'
1966
+ url: 'https://spaziodati.eu/'
1967
+ producer:
1968
+ name: 'SpazioDati S.r.l.'
1969
+ url: 'https://spaziodati.eu/'
1970
+
1971
+ - regex: 'Bytespider'
1972
+ name: 'Bytespider'
1973
+ category: 'Search bot'
1974
+ url: 'https://bytedance.com/'
1975
+ producer:
1976
+ name: 'ByteDance Ltd.'
1977
+ url: 'https://bytedance.com/'
1978
+
1979
+ - regex: 'WikiDo'
1980
+ name: 'WikiDo'
1981
+ category: 'Search bot'
1982
+ url: 'https://www.wikido.com/'
1983
+ producer:
1984
+ name: 'Fotolitografie Fiorentine di Becchi Antonio s.n.c.'
1985
+ url: 'https://www.wikido.com/'
1986
+
1987
+ - regex: 'AwarioSmartBot'
1988
+ name: 'Awario'
1989
+ category: 'Search bot'
1990
+ url: 'https://awario.com/bots.html'
1991
+ producer:
1992
+ name: 'Awario'
1993
+ url: 'https://awario.com/'
1994
+
1995
+ - regex: 'AwarioRssBot'
1996
+ name: 'Awario'
1997
+ category: 'Feed Fetcher'
1998
+ url: 'https://awario.com/bots.html'
1999
+ producer:
2000
+ name: 'Awario'
2001
+ url: 'https://awario.com/'
2002
+
2003
+ - regex: 'oBot'
2004
+ name: 'oBot'
2005
+ category: 'Search bot'
2006
+ url: 'http://www.xforce-security.com/crawler/'
2007
+ producer:
2008
+ name: 'IBM Germany Research & Development GmbH'
2009
+ url: 'https://exchange.xforce.ibmcloud.com/'
2010
+
2011
+ - regex: 'SMTBot'
2012
+ name: 'SMTBot'
2013
+ category: 'Search bot'
2014
+ url: 'https://www.similartech.com/smtbot'
2015
+ producer:
2016
+ name: 'SimilarTech Ltd.'
2017
+ url: 'https://www.similartech.com/'
2018
+
2019
+ - regex: 'LCC'
2020
+ name: 'LCC'
2021
+ category: 'Search bot'
2022
+ url: 'https://corpora.uni-leipzig.de/crawler_faq.html'
2023
+ producer:
2024
+ name: 'Universität Leipzig'
2025
+ url: 'https://www.uni-leipzig.de/'
2026
+
2027
+ - regex: 'Startpagina-Linkchecker'
2028
+ name: 'Startpagina Linkchecker'
2029
+ category: 'Search bot'
2030
+ url: 'https://www.startpagina.nl/linkchecker'
2031
+ producer:
2032
+ name: 'Startpagina B.V.'
2033
+ url: 'https://www.startpagina.nl/'
2034
+
2035
+ - regex: 'GTmetrix'
2036
+ name: 'GTmetrix'
2037
+ category: 'Crawler'
2038
+ url: 'https://gtmetrix.com/'
2039
+ producer:
2040
+ name: 'Carbon60 Operating Co. Ltd.'
2041
+ url: 'https://www.carbon60.com/'
2042
+
1827
2043
  # Generic detections
1828
2044
 
1829
2045
  - regex: 'Nutch'