useragent_parser 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/config/regexes.yaml CHANGED
@@ -79,8 +79,31 @@ user_agent_parsers:
79
79
 
80
80
  # Lightning (for Thunderbird)
81
81
  # http://www.mozilla.org/projects/calendar/lightning/
82
+ - regex: 'Lightning/\d+\.\d+[ab]?\d+[a-z]* (Thunderbird)/(\d+)\.(\d+)\.?(\d+)?'
83
+
82
84
  - regex: '(Lightning)/(\d+)\.(\d+)([ab]?\d+[a-z]*)'
83
85
 
86
+ # Eudora
87
+ - regex: '(Eudora)/(\d+)\.(\d+)\.?(\d+)?'
88
+
89
+ - regex: '(EUDORA)'
90
+ family_replacement: 'Eudora'
91
+
92
+ # T-Online E-Mail
93
+ - regex: '(T-Online eMail) (\d+)\.(\d+)?'
94
+
95
+ # Apple Mail
96
+ - regex: '(Mail)/\d+.*Darwin/10\.5'
97
+ family_replacement: 'Apple Mail'
98
+ v1_replacement: '3'
99
+
100
+ # Sparrow
101
+ - regex: '(Sparrow)/\d+'
102
+ family_replacement: 'Sparrow'
103
+
104
+ # Lotus Notes
105
+ - regex: '(Lotus-Notes)/(\d+)\.(\d+)'
106
+ family_replacement: 'Lotus Notes'
84
107
  # Swiftfox
85
108
  - regex: '(Firefox)/(\d+)\.(\d+)\.(\d+(?:pre)?) \(Swiftfox\)'
86
109
  family_replacement: 'Swiftfox'
@@ -153,21 +176,29 @@ user_agent_parsers:
153
176
  - regex: '(Twitterbot)/(\d+)\.(\d+)'
154
177
  family_replacement: 'TwitterBot'
155
178
 
179
+ # Windows Live Mail / Outlook Express
180
+ - regex: '(Outlook-Express)/(\d+)\.(\d+)'
181
+ family_replacement: 'Windows Live Mail'
182
+
156
183
  #### END SPECIAL CASES TOP ####
157
184
 
158
185
  #### MAIN CASES - this catches > 50% of all browsers ####
159
186
 
160
187
  # Browser/major_version.minor_version.beta_version
161
- - regex: '(AdobeAIR|Chromium|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Shiira|Sunrise|Chrome|Flock|Netscape|Lunascape|WebPilot|Vodafone|NetFront|Netfront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|ThunderBrowse|Iris|UP\.Browser|Bunjaloo|Google Earth|Raven for Mac)/(\d+)\.(\d+)\.(\d+)'
188
+ - regex: '(AdobeAIR|Chromium|FireWeb|Jasmine|ANTGalio|Midori|Fresco|Lobo|PaleMoon|Maxthon|Lynx|OmniWeb|Dillo|Camino|Demeter|Fluid|Fennec|Shiira|Sunrise|Chrome|Flock|Netscape|Lunascape|WebPilot|Vodafone|NetFront|Netfront|Konqueror|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|Opera Mini|iCab|NetNewsWire|Thunderbird|ThunderBrowse|Iris|UP\.Browser|Bunjaloo|Google Earth|Raven for Mac)/(\d+)\.(\d+)\.(\d+)'
162
189
 
163
190
  # Browser/major_version.minor_version
164
- - regex: '(Bolt|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|Vodafone|NetFront|Netfront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|NetNewsWire|Space Bison|Stainless|Orca|Dolfin|BOLT|Minimo|Tizen Browser|Polaris|Abrowser)/(\d+)\.(\d+)'
191
+ - regex: '(Bolt|Jasmine|IceCat|Skyfire|Midori|Maxthon|Lynx|Arora|IBrowse|Dillo|Camino|Shiira|Fennec|Phoenix|Chrome|Flock|Netscape|Lunascape|Epiphany|WebPilot|Opera Mini|Opera|Vodafone|NetFront|Netfront|Konqueror|Googlebot|SeaMonkey|Kazehakase|Vienna|Iceape|Iceweasel|IceWeasel|Iron|K-Meleon|Sleipnir|Galeon|GranParadiso|iCab|NetNewsWire|Space Bison|Stainless|Orca|Dolfin|BOLT|Minimo|Tizen Browser|Polaris|Abrowser|Thunderbird)/(\d+)\.(\d+)'
165
192
 
166
193
  # Browser major_version.minor_version.beta_version (space instead of slash)
167
194
  - regex: '(iRider|Crazy Browser|SkipStone|iCab|Lunascape|Sleipnir|Maemo Browser) (\d+)\.(\d+)\.(\d+)'
168
195
  # Browser major_version.minor_version (space instead of slash)
169
196
  - regex: '(iCab|Lunascape|Opera|Android|Jasmine|Polaris|BREW) (\d+)\.(\d+)\.?(\d+)?'
170
197
 
198
+ # Microsoft Outlook 2007/2010
199
+ - regex: '(MSOffice) (\d+)'
200
+ family_replacement: 'Outlook'
201
+
171
202
  # weird android UAs
172
203
  - regex: '(Android) Donut'
173
204
  v1_replacement: '1'
@@ -319,6 +350,10 @@ user_agent_parsers:
319
350
 
320
351
  - regex: '(Nintendo 3DS).* Version/(\d+)\.(\d+)(?:\.(\w+))'
321
352
 
353
+ # Generic AppleWebKit detection
354
+ - regex: '(AppleWebKit)/(\d+)\.?(\d+)?.*\(KHTML, like Gecko\)'
355
+ family_replacement: 'Apple WebKit'
356
+
322
357
  os_parsers:
323
358
 
324
359
  ##########
@@ -827,3 +862,38 @@ mobile_os_families:
827
862
  - 'Windows Phone 6.5'
828
863
  - 'Windows CE'
829
864
  - 'Symbian OS'
865
+
866
+ referrer_parsers:
867
+ - regex: '(mail\.live\.com)'
868
+ referrer_replacement: 'Hotmail'
869
+
870
+ - regex: '(mail\.yahoo\.com)'
871
+ referrer_replacement: 'Yahoo! Mail'
872
+
873
+ - regex: '(email\.freenet\.de)'
874
+ referrer_replacement: 'WEB.DE'
875
+
876
+ - regex: '(mail\.aol\.com)'
877
+ referrer_replacement: 'AOL Webmail'
878
+
879
+ - regex: '(mail\.google\.com)'
880
+ referrer_replacement: 'Gmail'
881
+
882
+ - regex: '(win\.mail\.ru)'
883
+ referrer_replacement: 'mail.ru'
884
+
885
+ - regex: '(service\.gmx\.net)'
886
+ referrer_replacement: 'GMX'
887
+
888
+ - regex: '(www\.gmxattachments\.net)'
889
+ referrer_replacement: 'GMX'
890
+
891
+ - regex: '(proxy[^\.]+\.bluewin\.ch)'
892
+ referrer_replacement: 'Swisscom'
893
+
894
+ - regex: '(communicator\.strato\.de)'
895
+ referrer_replacement: 'Strato'
896
+
897
+ - regex: '.+'
898
+ referrer_replacement: 'Other'
899
+
@@ -0,0 +1,34 @@
1
+ # encoding: utf-8
2
+
3
+ module UseragentParser
4
+ class ReferrerParser
5
+ attr_accessor :pattern, :referrer_re, :referrer_replacement
6
+
7
+ def initialize(pattern, referrer_replacement = nil)
8
+ @pattern = pattern
9
+ @referrer_re = Regexp.compile(pattern)
10
+ @referrer_replacement = referrer_replacement
11
+ end
12
+
13
+ def match_spans(referrer_string)
14
+ match_spans = []
15
+ match = @referrer_re.match(referrer_string)
16
+ if match
17
+ # Return the offsets
18
+ end
19
+ end
20
+
21
+ def parse(referrer_string)
22
+ referrer = nil
23
+ match = @referrer_re.match(referrer_string)
24
+ if match
25
+ if @referrer_replacement
26
+ referrer = @referrer_replacement
27
+ else
28
+ referrer = match[1]
29
+ end
30
+ end
31
+ return referrer
32
+ end
33
+ end
34
+ end
@@ -2,6 +2,7 @@ module UseragentParser
2
2
  class UserAgent
3
3
  attr_reader :browser, :browser_family, :browser_version, :browser_major_version, :browser_minor_version, :browser_patch_version
4
4
  attr_reader :os, :os_family, :os_version, :os_major_version, :os_minor_version, :os_patch_version
5
+ attr_reader :webmail_client
5
6
 
6
7
  def initialize(details = {})
7
8
  if user_agent = details['user_agent']
@@ -23,6 +24,10 @@ module UseragentParser
23
24
  @is_mobile = device['is_mobile']
24
25
  @is_spider = device['is_spider']
25
26
  end
27
+
28
+ if referrer = details['referrer']
29
+ @webmail_client = referrer['family']
30
+ end
26
31
  end
27
32
 
28
33
  def device
@@ -53,8 +58,107 @@ module UseragentParser
53
58
  @browser ||= browser_name
54
59
  end
55
60
 
61
+ def email
62
+ @email ||= email_name
63
+ end
64
+
65
+ def email_version
66
+ @email_version ||= email_version_name
67
+ end
68
+
69
+ def is_email?
70
+ is_outlook? || is_ios_mail? || is_apple_mail? || is_desktop_email? || is_webmail?
71
+ end
72
+
56
73
  protected
57
74
 
75
+ def is_desktop_email?
76
+ [
77
+ "Thunderbird",
78
+ "T-Online eMail",
79
+ "Eudora",
80
+ "Apple Mail",
81
+ "Sparrow",
82
+ "Lotus Notes",
83
+ "Windows Live Mail",
84
+ "Outlook",
85
+ "AOL"
86
+ ].include? @browser_family
87
+ end
88
+
89
+ def is_ios_mail?
90
+ @webmail_client.nil? && @browser_family == 'Mobile Safari' && @os_family == 'iOS'
91
+ end
92
+
93
+ def is_apple_mail?
94
+ @webmail_client.nil? && @browser_family == 'Apple WebKit' && @os_family == 'Mac OS X'
95
+ end
96
+
97
+ def is_webmail?
98
+ [
99
+ "Hotmail",
100
+ "Yahoo! Mail",
101
+ "WEB.DE",
102
+ "AOL Webmail",
103
+ "Gmail",
104
+ "GMX",
105
+ "Swisscom",
106
+ "Strato",
107
+ "mail.ru"
108
+ ].include?(@webmail_client)
109
+ end
110
+
111
+ def is_aol?
112
+ @browser_family == 'AOL' || @webmail_client == 'AOL Webmail'
113
+ end
114
+
115
+ def is_outlook?
116
+ @browser_family == 'Outlook' || (@browser_family == 'IE' and @webmail_client.nil?)
117
+ end
118
+
119
+ def email_name
120
+ return 'Microsoft Outlook' if is_outlook?
121
+ return 'Apple Mail' if is_apple_mail?
122
+ return 'Apple Mobile Mail' if is_ios_mail?
123
+ return @browser_family if is_desktop_email?
124
+ return @webmail_client if is_webmail?
125
+ return @os_family if @os_family == 'Android'
126
+ return 'AOL' if is_aol?
127
+ end
128
+
129
+ def email_version_name
130
+ return apple_mail_names if is_apple_mail?
131
+ return ios_mail_names if is_ios_mail?
132
+ return outlook_names if is_outlook?
133
+ return 'AOL Desktop' if @browser_family == 'AOL'
134
+ email_name
135
+ end
136
+
137
+ def apple_mail_names
138
+ case @os_minor_version
139
+ when '0', '1', '2', '3' then 'Apple Mail 1'
140
+ when '4' then 'Apple Mail 2'
141
+ when '5' then 'Apple Mail 3'
142
+ when '6' then 'Apple Mail 4'
143
+ when '7' then 'Apple Mail 5'
144
+ when '8' then 'Apple Mail 6'
145
+ else 'Apple Mail'
146
+ end
147
+ end
148
+
149
+ def ios_mail_names
150
+ @device
151
+ end
152
+
153
+ def outlook_names
154
+ return 'Outlook 2000/2003/Express' unless @browser_family == 'Outlook'
155
+ case @browser_major_version
156
+ when '12' then 'Outlook 2007'
157
+ when '14' then 'Outlook 2010'
158
+ else 'Outlook'
159
+ end
160
+ end
161
+
58
162
  def browser_name
59
163
  "#{browser_family} #{browser_version}"
60
164
  end
@@ -1,3 +1,3 @@
1
1
  module UseragentParser
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.0"
3
3
  end
@@ -2,12 +2,15 @@ require "useragent_parser/version"
2
2
  require "useragent_parser/parsers/user_agent_parser"
3
3
  require "useragent_parser/parsers/os_parser"
4
4
  require "useragent_parser/parsers/device_parser"
5
+ require "useragent_parser/parsers/referrer_parser"
5
6
  require "useragent_parser/user_agent"
7
+ require "yaml"
6
8
 
7
9
  module UseragentParser
8
10
  USER_AGENT_PARSERS = []
9
11
  OS_PARSERS = []
10
12
  DEVICE_PARSERS = []
13
+ REFERRER_PARSERS = []
11
14
  MOBILE_USER_AGENT_FAMILIES = []
12
15
  MOBILE_OS_FAMILIES = []
13
16
 
@@ -35,12 +38,36 @@ module UseragentParser
35
38
  DEVICE_PARSERS.push UseragentParser::DeviceParser.new(regex, device_replacement)
36
39
  end
37
40
 
41
+ yaml['referrer_parsers'].each do |parser|
42
+ regex = parser['regex']
43
+ referrer_replacement = parser.fetch('referrer_replacement', nil)
44
+
45
+ REFERRER_PARSERS.push UseragentParser::ReferrerParser.new(regex, referrer_replacement)
46
+ end
47
+
38
48
  MOBILE_USER_AGENT_FAMILIES.push *yaml['mobile_user_agent_families']
39
49
  MOBILE_OS_FAMILIES.push *yaml['mobile_os_families']
40
50
  end
41
51
 
42
- def self.parse_all(user_agent_string, *js_args)
43
- # UseragentParser::UserAgent.new{
52
+ def self.parse(user_agent_string, *js_args)
53
+ UseragentParser::UserAgent.new(self.parse_browser(user_agent_string, *js_args))
54
+ end
55
+
56
+ def self.parse_with_referrer(user_agent_string, referrer = nil, *js_args)
57
+ UseragentParser::UserAgent.new(self.parse_email(user_agent_string, referrer, *js_args))
58
+ end
59
+
60
+ def self.parse_email(user_agent_string, referrer = nil, *js_args)
61
+ {
62
+ 'user_agent' => self.parse_user_agent(user_agent_string, *js_args),
63
+ 'os' => self.parse_os(user_agent_string, *js_args),
64
+ 'device' => self.parse_device(user_agent_string, *js_args),
65
+ 'string' => user_agent_string,
66
+ 'referrer' => self.parse_referrer(referrer)
67
+ }
68
+ end
69
+
70
+ def self.parse_browser(user_agent_string, *js_args)
44
71
  {
45
72
  'user_agent' => self.parse_user_agent(user_agent_string, *js_args),
46
73
  'os' => self.parse_os(user_agent_string, *js_args),
@@ -108,6 +135,16 @@ module UseragentParser
108
135
 
109
136
  { 'family' => device, 'is_mobile' => is_mobile, 'is_spider' => (device == 'Spider') }
110
137
  end
138
+
139
+ def self.parse_referrer(referrer_string)
140
+ referrer = nil
141
+ REFERRER_PARSERS.each do |parser|
142
+ referrer = parser.parse(referrer_string)
143
+ break unless referrer.nil?
144
+ end
145
+
146
+ { 'family' => referrer }
147
+ end
111
148
  end
112
149
 
113
150
  UseragentParser.load_parsers!
@@ -3925,14 +3925,14 @@ test_cases:
3925
3925
  minor:
3926
3926
  patch:
3927
3927
  - user_agent_string: "Mozilla/4.0 (compatible; Lotus-Notes/5.0; Windows-NT)"
3928
- family: "Other"
3929
- major:
3930
- minor:
3928
+ family: "Lotus Notes"
3929
+ major: '5'
3930
+ minor: '0'
3931
3931
  patch:
3932
3932
  - user_agent_string: "Mozilla/4.0 (compatible; Lotus-Notes/6.0; Windows-NT)"
3933
- family: "Other"
3934
- major:
3935
- minor:
3933
+ family: "Lotus Notes"
3934
+ major: '6'
3935
+ minor: '0'
3936
3936
  patch:
3937
3937
  - user_agent_string: "LTH/3.02a (http://www.learntohack.nil)"
3938
3938
  family: "Other"
@@ -5185,13 +5185,13 @@ test_cases:
5185
5185
  minor:
5186
5186
  patch:
5187
5187
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/103u (KHTML, like Gecko) safari/100"
5188
- family: "Other"
5189
- major:
5188
+ family: "Apple WebKit"
5189
+ major: '103'
5190
5190
  minor:
5191
5191
  patch:
5192
5192
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/124 (KHTML, like Gecko)"
5193
- family: "Other"
5194
- major:
5193
+ family: "Apple WebKit"
5194
+ major: '124'
5195
5195
  minor:
5196
5196
  patch:
5197
5197
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/124 (KHTML, like Gecko, Safari) Shiira/0.9.1"
@@ -5235,8 +5235,8 @@ test_cases:
5235
5235
  minor:
5236
5236
  patch:
5237
5237
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/124 (KHTML, like Gecko) safari/125.0"
5238
- family: "Other"
5239
- major:
5238
+ family: "Apple WebKit"
5239
+ major: '124'
5240
5240
  minor:
5241
5241
  patch:
5242
5242
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-us) AppleWebKit/125.2 (KHTML, like Gecko, Safari) Shiira/0.9.2.2"
@@ -5265,18 +5265,18 @@ test_cases:
5265
5265
  minor: '34'
5266
5266
  patch:
5267
5267
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/85 (KHTML, like Gecko) OmniWeb/v496"
5268
- family: "Other"
5269
- major:
5268
+ family: "Apple WebKit"
5269
+ major: '85'
5270
5270
  minor:
5271
5271
  patch:
5272
5272
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/85 (KHTML, like Gecko) OmniWeb/v549"
5273
- family: "Other"
5274
- major:
5273
+ family: "Apple WebKit"
5274
+ major: '85'
5275
5275
  minor:
5276
5276
  patch:
5277
5277
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/85 (KHTML, like Gecko) OmniWeb/v558"
5278
- family: "Other"
5279
- major:
5278
+ family: "Apple WebKit"
5279
+ major: '85'
5280
5280
  minor:
5281
5281
  patch:
5282
5282
  - user_agent_string: "Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US) AppleWebKit/85 (KHTML, like Gecko) OmniWeb/v558.43"
@@ -0,0 +1,40 @@
1
+ test_cases:
2
+
3
+ - referrer_string: 'http://bl131w.blu131.mail.live.com/mail/InboxLight.aspx?n=1166653403'
4
+ family: 'Hotmail'
5
+
6
+ - referrer_string: 'http://de.mc250.mail.yahoo.com/mc/welcome?.gx=1&.tm=1290672405&.rand=4rtrlgfb1p8na'
7
+ family: 'Yahoo! Mail'
8
+
9
+ - referrer_string: 'http://email.freenet.de/Email/View/Body?msg=4681&folder=INBOX&showImages=0&showImages=1&updateShowImagesStatus=1'
10
+ family: 'WEB.DE'
11
+
12
+ - referrer_string: 'http://webmail.aol.com/42951/aol/en-us/Suite.aspx'
13
+ family: 'AOL Webmail'
14
+
15
+ - referrer_string: 'http://mail.aol.com/32945-111/aol-1/de-de/Lite/MsgRead.aspx?folder=Spam&uid=1.27991476&seq=1&searchIn=none&searchQuery=&start=0&sort=received'
16
+ family: 'AOL Webmail'
17
+
18
+ - referrer_string: 'http://mail.google.com/mail/?ui=2&view=bsp&ver=ohhl4rw8mbn4'
19
+ family: 'Gmail'
20
+
21
+ - referrer_string: 'http://service.gmx.net/de/cgi/g.fcgi/mail/print?folder=inbox&uid=NDI5OMAkFGxkb%2FUN4WZuvoB2anv6Llsc&CUSTOMERNO=8421009&t=de730856557.1294240410.14bd9d8'
22
+ family: 'GMX'
23
+
24
+ - referrer_string: 'http://www.gmxattachments.net/de/cgi/g.fcgi/mail/print/fullhtml?mid=babgebac.1290790887.16270.a7fshhjuag.74&uid=ZGllQYdlCG9ydask92VvuYdkdFl2iYWj&partUid=ZGllQYdlCG9ydask92VvuYdkdFl2iYWj'
25
+ family: 'GMX'
26
+
27
+ - referrer_string: 'http://proxy-mssazhh.bluewin.ch/mail/MessageRead?sid='
28
+ family: 'Swisscom'
29
+
30
+ - referrer_string: 'http://communicator.strato.de/messages/mail_preview.html?msgNo=964'
31
+ family: 'Strato'
32
+
33
+ - referrer_string: 'http://win.mail.ru/cgi-bin/msglist'
34
+ family: 'mail.ru'
35
+
36
+ - referrer_string: 'http://www.example.com'
37
+ family: 'Other'
38
+
39
+ - referrer_string: ''
40
+ family: