logstash-filter-useragent 3.2.2-java → 3.3.2-java

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +1,8 @@
1
1
  # encoding: utf-8
2
- require "java"
3
2
  require "logstash-filter-useragent_jars"
4
3
  require "logstash/filters/base"
5
4
  require "logstash/namespace"
6
- require "tempfile"
7
- require "thread"
5
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
8
6
 
9
7
  # Parse user agent strings into structured data based on BrowserScope data
10
8
  #
@@ -16,6 +14,8 @@ require "thread"
16
14
  # <https://github.com/tobie/ua-parser/>.
17
15
  class LogStash::Filters::UserAgent < LogStash::Filters::Base
18
16
 
17
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
18
+
19
19
  config_name "useragent"
20
20
 
21
21
  # The field containing the user agent string. If this field is an
@@ -25,7 +25,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
25
25
  # The name of the field to assign user agent data into.
26
26
  #
27
27
  # If not specified user agent data will be stored in the root of the event.
28
- config :target, :validate => :string
28
+ config :target, :validate => :string # default [user_agent] in ECS mode
29
29
 
30
30
  # `regexes.yaml` file to use
31
31
  #
@@ -37,7 +37,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
37
37
  config :regexes, :validate => :string
38
38
 
39
39
  # A string to prepend to all of the extracted keys
40
- config :prefix, :validate => :string, :default => ''
40
+ config :prefix, :validate => :string, :default => '' # not supported in ECS mode
41
41
 
42
42
  # UA parsing is surprisingly expensive. This filter uses an LRU cache to take advantage of the fact that
43
43
  # user agents are often found adjacent to one another in log files and rarely have a random distribution.
@@ -55,7 +55,46 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
55
55
  # number of cache misses and waste memory.
56
56
  config :lru_cache_size, :validate => :number, :default => 100_000
57
57
 
58
+ def initialize(*params)
59
+ super
60
+
61
+ # make @target in the format [field name] if defined, i.e. surrounded by brackets
62
+ target = @target || ecs_select[disabled: '', v1: '[user_agent]']
63
+ target = "[#{@target}]" if !target.empty? && target !~ /^\[[^\[\]]+\]$/
64
+
65
+ @name_field = ecs_select[disabled: "[#{@prefix}name]", v1: '[name]']
66
+ @name_field = "#{target}#{@name_field}"
67
+
68
+ @device_name_field = ecs_select[disabled: "[#{@prefix}device]", v1: '[device][name]']
69
+ @device_name_field = "#{target}#{@device_name_field}"
70
+
71
+ @version_field = ecs_select[disabled: "[#{@prefix}version]", v1: '[version]']
72
+ @version_field = "#{target}#{@version_field}"
73
+ @major_field = ecs_select[disabled: "#{target}[#{@prefix}major]", v1: "[@metadata][filter][user_agent][version][major]"]
74
+ @minor_field = ecs_select[disabled: "#{target}[#{@prefix}minor]", v1: "[@metadata][filter][user_agent][version][minor]"]
75
+ @patch_field = ecs_select[disabled: "#{target}[#{@prefix}patch]", v1: "[@metadata][filter][user_agent][version][patch]"]
76
+
77
+ @os_full_name_field = ecs_select[disabled: "[#{@prefix}os_full]", v1: '[os][full]'] # did not exist in legacy prior to ECS-ification
78
+ @os_full_name_field = "#{target}#{@os_full_name_field}"
79
+
80
+ @os_name_field = ecs_select[disabled: "[#{@prefix}os_name]", v1: '[os][name]']
81
+ @os_name_field = "#{target}#{@os_name_field}"
82
+ @legacy_os_field = ecs_select[disabled: "#{target}[#{@prefix}os]", v1: nil] # same as [os_name] in legacy mode
83
+
84
+ @os_version_field = ecs_select[disabled: "[#{@prefix}os_version]", v1: '[os][version]']
85
+ @os_version_field = "#{target}#{@os_version_field}"
86
+ @os_major_field = ecs_select[disabled: "#{target}[#{@prefix}os_major]", v1: "[@metadata][filter][user_agent][os][version][major]"]
87
+ @os_minor_field = ecs_select[disabled: "#{target}[#{@prefix}os_minor]", v1: "[@metadata][filter][user_agent][os][version][minor]"]
88
+ @os_patch_field = ecs_select[disabled: "#{target}[#{@prefix}os_patch]", v1: "[@metadata][filter][user_agent][os][version][patch]"]
89
+
90
+ # NOTE: unfortunately we can not reliably provide `user_agent.original` since the patterns do not
91
+ # reliably give back the matched group and they support the UA string prefixed and/or suffixed
92
+ end
93
+
58
94
  def register
95
+ if ecs_compatibility != :disabled && @prefix && !@prefix.empty?
96
+ @logger.warn "Field prefix isn't supported in ECS compatibility mode, please remove `prefix => #{@prefix.inspect}`"
97
+ end
59
98
 
60
99
  if @regexes.nil?
61
100
  @parser = org.logstash.uaparser.CachingParser.new(lru_cache_size)
@@ -63,21 +102,6 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
63
102
  @logger.debug("Using user agent regexes", :regexes => @regexes)
64
103
  @parser = org.logstash.uaparser.CachingParser.new(@regexes, lru_cache_size)
65
104
  end
66
-
67
- # make @target in the format [field name] if defined, i.e. surrounded by brakets
68
- normalized_target = (@target && @target !~ /^\[[^\[\]]+\]$/) ? "[#{@target}]" : ""
69
-
70
- # predefine prefixed field names
71
- @prefixed_name = "#{normalized_target}[#{@prefix}name]"
72
- @prefixed_os = "#{normalized_target}[#{@prefix}os]"
73
- @prefixed_os_name = "#{normalized_target}[#{@prefix}os_name]"
74
- @prefixed_os_major = "#{normalized_target}[#{@prefix}os_major]"
75
- @prefixed_os_minor = "#{normalized_target}[#{@prefix}os_minor]"
76
- @prefixed_device = "#{normalized_target}[#{@prefix}device]"
77
- @prefixed_major = "#{normalized_target}[#{@prefix}major]"
78
- @prefixed_minor = "#{normalized_target}[#{@prefix}minor]"
79
- @prefixed_patch = "#{normalized_target}[#{@prefix}patch]"
80
- @prefixed_build = "#{normalized_target}[#{@prefix}build]"
81
105
  end
82
106
 
83
107
  def filter(event)
@@ -88,60 +112,127 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
88
112
 
89
113
  begin
90
114
  ua_data = lookup_useragent(useragent)
91
- rescue StandardError => e
92
- @logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
115
+ rescue => e
116
+ @logger.error("Unknown error while parsing user agent data",
117
+ :exception => e.class, :message => e.message, :backtrace => e.backtrace,
118
+ :field => @source, :event => event.to_hash)
93
119
  return
94
120
  end
95
121
 
96
122
  return unless ua_data
97
123
 
98
124
  event.remove(@source) if @target == @source
99
- set_fields(event, ua_data)
125
+ set_fields(event, useragent, ua_data)
100
126
 
101
127
  filter_matched(event)
102
128
  end
103
129
 
104
- # should be private but need to stay public for specs
105
- # TODO: (colin) the related specs should be refactored to not rely on private methods.
106
- def lookup_useragent(useragent)
107
- return unless useragent
130
+ private
108
131
 
109
- # the UserAgentParser::Parser class is not thread safe, indications are that it is probably
110
- # caused by the underlying JRuby regex code that is not thread safe.
111
- # see https://github.com/logstash-plugins/logstash-filter-useragent/issues/25
132
+ def lookup_useragent(useragent)
112
133
  @parser.parse(useragent)
113
134
  end
114
135
 
115
- private
116
-
117
- def set_fields(event, ua_data)
118
- # UserAgentParser outputs as US-ASCII.
119
-
120
- event.set(@prefixed_name, ua_data.userAgent.family.dup.force_encoding(Encoding::UTF_8))
121
-
122
- #OSX, Android and maybe iOS parse correctly, ua-agent parsing for Windows does not provide this level of detail
136
+ def set_fields(event, ua_source, ua_data)
137
+ # UserAgentParser strings are US-ASCII
138
+
139
+ ua = ua_data.userAgent
140
+ event.set(@name_field, duped_string(ua.family))
141
+ event.set(@device_name_field, duped_string(ua_data.device)) if ua_data.device
142
+
143
+ event.set(@major_field, duped_string(ua.major)) if ua.major
144
+ event.set(@minor_field, duped_string(ua.minor)) if ua.minor
145
+ event.set(@patch_field, duped_string(ua.patch)) if ua.patch
146
+ set_version(event, ua_source, ua) # UA version string e.g. "89.0.4389.90"
147
+
148
+ os = ua_data.os
149
+ if os
150
+ # os.major, os.minor, ... are all strings
151
+ event.set(@os_major_field, duped_string(os.major)) if os.major # e.g. 'Vista' or '10'
152
+ event.set(@os_minor_field, duped_string(os.minor)) if os.minor
153
+ event.set(@os_patch_field, duped_string(os.patch)) if os.patch
154
+ os_version = build_os_version(os)
155
+ event.set(@os_version_field, os_version) if os_version
156
+
157
+ os_name = os.family
158
+ if os_name
159
+ os_name = duped_string(os_name)
160
+ event.set(@os_name_field, os_name)
161
+ event.set(@legacy_os_field, os_name.dup) if @legacy_os_field
162
+ os_full_name = os_name.dup
163
+ os_full_name << ' ' << os_version if os_version
164
+ event.set(@os_full_name_field, os_full_name)
165
+ end
166
+ end
167
+ end
123
168
 
124
- # Calls in here use #dup because there's potential for later filters to modify these values
125
- # and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
126
- if (os = ua_data.os)
127
- # The OS is a rich object
128
- event.set(@prefixed_os, ua_data.os.family.dup.force_encoding(Encoding::UTF_8))
129
- event.set(@prefixed_os_name, os.family.dup.force_encoding(Encoding::UTF_8)) if os.family
130
-
131
- # These are all strings
132
- if os.minor && os.major
133
- event.set(@prefixed_os_major, os.major.dup.force_encoding(Encoding::UTF_8)) if os.major
134
- event.set(@prefixed_os_minor, os.minor.dup.force_encoding(Encoding::UTF_8)) if os.minor
169
+ # reconstruct and set the User-Agent version string
170
+ def set_version(event, ua_source, ua)
171
+ if @version_field && ua.major
172
+ # only Chrome has all 4 segments, while Firefox only uses major.minor
173
+ version = duped_string(ua.major)
174
+ if ua.minor
175
+ version << '.' << ua.minor
176
+ if ua.patch
177
+ version << '.' << ua.patch
178
+ if ua.patchMinor
179
+ version << '.' << ua.patchMinor
180
+ else
181
+ adjusted_version = check_and_adjust_version(ua_source, version)
182
+ version = adjusted_version if adjusted_version
183
+ end
184
+ end
135
185
  end
186
+ event.set(@version_field, version)
136
187
  end
188
+ end
137
189
 
138
- event.set(@prefixed_device, ua_data.device.to_s.dup.force_encoding(Encoding::UTF_8)) if ua_data.device
190
+ def check_and_adjust_version(ua_source, version)
191
+ # only set OS version if it's not 'interpreted' (contained in UA string)
192
+ return nil if !version || (i = ua_source.index(version)).nil?
193
+ i += version.size
194
+ # complete version when patchMinor is not matched but still there
195
+ if ua_source[i] == '.' # we built the version with dots
196
+ if patch_minor = ua_source.index(' ', i + 1)
197
+ patch_minor = ua_source[i + 1...patch_minor]
198
+ if patch_minor.eql? patch_minor.to_i.to_s
199
+ version = "#{version}.#{patch_minor}"
200
+ end
201
+ end
202
+ end
203
+ version
204
+ end
139
205
 
140
- if (ua_version = ua_data.userAgent)
141
- event.set(@prefixed_major, ua_version.major.dup.force_encoding(Encoding::UTF_8)) if ua_version.major
142
- event.set(@prefixed_minor, ua_version.minor.dup.force_encoding(Encoding::UTF_8)) if ua_version.minor
143
- event.set(@prefixed_patch, ua_version.patch.dup.force_encoding(Encoding::UTF_8)) if ua_version.patch
144
- event.set(@prefixed_build, ua_version.patchMinor.dup.force_encoding(Encoding::UTF_8)) if ua_version.patchMinor
206
+ # reconstructs the OS version string
207
+ def build_os_version(os)
208
+ # NOTE: UA regexes don't always give us the versions back
209
+ # they do get "corrected" for various OSes such as:
210
+ # - Windows (Windows NT 6.0 => 'Vista')
211
+ # - Windows ('Windows NT 6.3' => '8','1')
212
+ # - Windows ('Windows NT 10.0' => '10')
213
+ # - iOS ('Darwin/15.5' => '9','3','2')
214
+ return unless major = os.major
215
+ if major.to_i.to_s == major
216
+ version, sep = duped_string(major), '.'
217
+ else
218
+ version, sep = duped_string(major), ' '
145
219
  end
220
+ if os.minor
221
+ version << sep << os.minor
222
+ if os.patch
223
+ version << '.' << os.patch
224
+ if os.patchMinor
225
+ version << '.' << os.patchMinor
226
+ end
227
+ end
228
+ end
229
+ version
146
230
  end
231
+
232
+ def duped_string(str)
233
+ # Calls in here use #dup because there's potential for later filters to modify these values
234
+ # and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
235
+ str.dup.force_encoding(Encoding::UTF_8)
236
+ end
237
+
147
238
  end
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
 
23
23
  # Gem dependencies
24
24
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
25
+ s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3'
25
26
  s.add_development_dependency 'logstash-devutils'
26
27
  end
27
28