logstash-filter-useragent 3.2.2-java → 3.3.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,8 @@
1
1
  # encoding: utf-8
2
- require "java"
3
2
  require "logstash-filter-useragent_jars"
4
3
  require "logstash/filters/base"
5
4
  require "logstash/namespace"
6
- require "tempfile"
7
- require "thread"
5
+ require 'logstash/plugin_mixins/ecs_compatibility_support'
8
6
 
9
7
  # Parse user agent strings into structured data based on BrowserScope data
10
8
  #
@@ -16,6 +14,8 @@ require "thread"
16
14
  # <https://github.com/tobie/ua-parser/>.
17
15
  class LogStash::Filters::UserAgent < LogStash::Filters::Base
18
16
 
17
+ include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
18
+
19
19
  config_name "useragent"
20
20
 
21
21
  # The field containing the user agent string. If this field is an
@@ -25,7 +25,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
25
25
  # The name of the field to assign user agent data into.
26
26
  #
27
27
  # If not specified user agent data will be stored in the root of the event.
28
- config :target, :validate => :string
28
+ config :target, :validate => :string # default [user_agent] in ECS mode
29
29
 
30
30
  # `regexes.yaml` file to use
31
31
  #
@@ -37,7 +37,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
37
37
  config :regexes, :validate => :string
38
38
 
39
39
  # A string to prepend to all of the extracted keys
40
- config :prefix, :validate => :string, :default => ''
40
+ config :prefix, :validate => :string, :default => '' # not supported in ECS mode
41
41
 
42
42
  # UA parsing is surprisingly expensive. This filter uses an LRU cache to take advantage of the fact that
43
43
  # user agents are often found adjacent to one another in log files and rarely have a random distribution.
@@ -55,7 +55,46 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
55
55
  # number of cache misses and waste memory.
56
56
  config :lru_cache_size, :validate => :number, :default => 100_000
57
57
 
58
+ def initialize(*params)
59
+ super
60
+
61
+ # make @target in the format [field name] if defined, i.e. surrounded by brackets
62
+ target = @target || ecs_select[disabled: '', v1: '[user_agent]']
63
+ target = "[#{@target}]" if !target.empty? && target !~ /^\[[^\[\]]+\]$/
64
+
65
+ @name_field = ecs_select[disabled: "[#{@prefix}name]", v1: '[name]']
66
+ @name_field = "#{target}#{@name_field}"
67
+
68
+ @device_name_field = ecs_select[disabled: "[#{@prefix}device]", v1: '[device][name]']
69
+ @device_name_field = "#{target}#{@device_name_field}"
70
+
71
+ @version_field = ecs_select[disabled: "[#{@prefix}version]", v1: '[version]']
72
+ @version_field = "#{target}#{@version_field}"
73
+ @major_field = ecs_select[disabled: "#{target}[#{@prefix}major]", v1: "[@metadata][filter][user_agent][version][major]"]
74
+ @minor_field = ecs_select[disabled: "#{target}[#{@prefix}minor]", v1: "[@metadata][filter][user_agent][version][minor]"]
75
+ @patch_field = ecs_select[disabled: "#{target}[#{@prefix}patch]", v1: "[@metadata][filter][user_agent][version][patch]"]
76
+
77
+ @os_full_name_field = ecs_select[disabled: "[#{@prefix}os_full]", v1: '[os][full]'] # did not exist in legacy prior to ECS-ification
78
+ @os_full_name_field = "#{target}#{@os_full_name_field}"
79
+
80
+ @os_name_field = ecs_select[disabled: "[#{@prefix}os_name]", v1: '[os][name]']
81
+ @os_name_field = "#{target}#{@os_name_field}"
82
+ @legacy_os_field = ecs_select[disabled: "#{target}[#{@prefix}os]", v1: nil] # same as [os_name] in legacy mode
83
+
84
+ @os_version_field = ecs_select[disabled: "[#{@prefix}os_version]", v1: '[os][version]']
85
+ @os_version_field = "#{target}#{@os_version_field}"
86
+ @os_major_field = ecs_select[disabled: "#{target}[#{@prefix}os_major]", v1: "[@metadata][filter][user_agent][os][version][major]"]
87
+ @os_minor_field = ecs_select[disabled: "#{target}[#{@prefix}os_minor]", v1: "[@metadata][filter][user_agent][os][version][minor]"]
88
+ @os_patch_field = ecs_select[disabled: "#{target}[#{@prefix}os_patch]", v1: "[@metadata][filter][user_agent][os][version][patch]"]
89
+
90
+ # NOTE: unfortunately we can not reliably provide `user_agent.original` since the patterns do not
91
+ # reliably give back the matched group and they support the UA string prefixed and/or suffixed
92
+ end
93
+
58
94
  def register
95
+ if ecs_compatibility != :disabled && @prefix && !@prefix.empty?
96
+ @logger.warn "Field prefix isn't supported in ECS compatibility mode, please remove `prefix => #{@prefix.inspect}`"
97
+ end
59
98
 
60
99
  if @regexes.nil?
61
100
  @parser = org.logstash.uaparser.CachingParser.new(lru_cache_size)
@@ -63,21 +102,6 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
63
102
  @logger.debug("Using user agent regexes", :regexes => @regexes)
64
103
  @parser = org.logstash.uaparser.CachingParser.new(@regexes, lru_cache_size)
65
104
  end
66
-
67
- # make @target in the format [field name] if defined, i.e. surrounded by brakets
68
- normalized_target = (@target && @target !~ /^\[[^\[\]]+\]$/) ? "[#{@target}]" : ""
69
-
70
- # predefine prefixed field names
71
- @prefixed_name = "#{normalized_target}[#{@prefix}name]"
72
- @prefixed_os = "#{normalized_target}[#{@prefix}os]"
73
- @prefixed_os_name = "#{normalized_target}[#{@prefix}os_name]"
74
- @prefixed_os_major = "#{normalized_target}[#{@prefix}os_major]"
75
- @prefixed_os_minor = "#{normalized_target}[#{@prefix}os_minor]"
76
- @prefixed_device = "#{normalized_target}[#{@prefix}device]"
77
- @prefixed_major = "#{normalized_target}[#{@prefix}major]"
78
- @prefixed_minor = "#{normalized_target}[#{@prefix}minor]"
79
- @prefixed_patch = "#{normalized_target}[#{@prefix}patch]"
80
- @prefixed_build = "#{normalized_target}[#{@prefix}build]"
81
105
  end
82
106
 
83
107
  def filter(event)
@@ -88,60 +112,127 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
88
112
 
89
113
  begin
90
114
  ua_data = lookup_useragent(useragent)
91
- rescue StandardError => e
92
- @logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
115
+ rescue => e
116
+ @logger.error("Unknown error while parsing user agent data",
117
+ :exception => e.class, :message => e.message, :backtrace => e.backtrace,
118
+ :field => @source, :event => event.to_hash)
93
119
  return
94
120
  end
95
121
 
96
122
  return unless ua_data
97
123
 
98
124
  event.remove(@source) if @target == @source
99
- set_fields(event, ua_data)
125
+ set_fields(event, useragent, ua_data)
100
126
 
101
127
  filter_matched(event)
102
128
  end
103
129
 
104
- # should be private but need to stay public for specs
105
- # TODO: (colin) the related specs should be refactored to not rely on private methods.
106
- def lookup_useragent(useragent)
107
- return unless useragent
130
+ private
108
131
 
109
- # the UserAgentParser::Parser class is not thread safe, indications are that it is probably
110
- # caused by the underlying JRuby regex code that is not thread safe.
111
- # see https://github.com/logstash-plugins/logstash-filter-useragent/issues/25
132
+ def lookup_useragent(useragent)
112
133
  @parser.parse(useragent)
113
134
  end
114
135
 
115
- private
116
-
117
- def set_fields(event, ua_data)
118
- # UserAgentParser outputs as US-ASCII.
119
-
120
- event.set(@prefixed_name, ua_data.userAgent.family.dup.force_encoding(Encoding::UTF_8))
121
-
122
- #OSX, Android and maybe iOS parse correctly, ua-agent parsing for Windows does not provide this level of detail
136
+ def set_fields(event, ua_source, ua_data)
137
+ # UserAgentParser strings are US-ASCII
138
+
139
+ ua = ua_data.userAgent
140
+ event.set(@name_field, duped_string(ua.family))
141
+ event.set(@device_name_field, duped_string(ua_data.device)) if ua_data.device
142
+
143
+ event.set(@major_field, duped_string(ua.major)) if ua.major
144
+ event.set(@minor_field, duped_string(ua.minor)) if ua.minor
145
+ event.set(@patch_field, duped_string(ua.patch)) if ua.patch
146
+ set_version(event, ua_source, ua) # UA version string e.g. "89.0.4389.90"
147
+
148
+ os = ua_data.os
149
+ if os
150
+ # os.major, os.minor, ... are all strings
151
+ event.set(@os_major_field, duped_string(os.major)) if os.major # e.g. 'Vista' or '10'
152
+ event.set(@os_minor_field, duped_string(os.minor)) if os.minor
153
+ event.set(@os_patch_field, duped_string(os.patch)) if os.patch
154
+ os_version = build_os_version(os)
155
+ event.set(@os_version_field, os_version) if os_version
156
+
157
+ os_name = os.family
158
+ if os_name
159
+ os_name = duped_string(os_name)
160
+ event.set(@os_name_field, os_name)
161
+ event.set(@legacy_os_field, os_name.dup) if @legacy_os_field
162
+ os_full_name = os_name.dup
163
+ os_full_name << ' ' << os_version if os_version
164
+ event.set(@os_full_name_field, os_full_name)
165
+ end
166
+ end
167
+ end
123
168
 
124
- # Calls in here use #dup because there's potential for later filters to modify these values
125
- # and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
126
- if (os = ua_data.os)
127
- # The OS is a rich object
128
- event.set(@prefixed_os, ua_data.os.family.dup.force_encoding(Encoding::UTF_8))
129
- event.set(@prefixed_os_name, os.family.dup.force_encoding(Encoding::UTF_8)) if os.family
130
-
131
- # These are all strings
132
- if os.minor && os.major
133
- event.set(@prefixed_os_major, os.major.dup.force_encoding(Encoding::UTF_8)) if os.major
134
- event.set(@prefixed_os_minor, os.minor.dup.force_encoding(Encoding::UTF_8)) if os.minor
169
+ # reconstruct and set the User-Agent version string
170
+ def set_version(event, ua_source, ua)
171
+ if @version_field && ua.major
172
+ # only Chrome has all 4 segments, while Firefox only uses major.minor
173
+ version = duped_string(ua.major)
174
+ if ua.minor
175
+ version << '.' << ua.minor
176
+ if ua.patch
177
+ version << '.' << ua.patch
178
+ if ua.patchMinor
179
+ version << '.' << ua.patchMinor
180
+ else
181
+ adjusted_version = check_and_adjust_version(ua_source, version)
182
+ version = adjusted_version if adjusted_version
183
+ end
184
+ end
135
185
  end
186
+ event.set(@version_field, version)
136
187
  end
188
+ end
137
189
 
138
- event.set(@prefixed_device, ua_data.device.to_s.dup.force_encoding(Encoding::UTF_8)) if ua_data.device
190
+ def check_and_adjust_version(ua_source, version)
191
+ # only set OS version if it's not 'interpreted' (contained in UA string)
192
+ return nil if !version || (i = ua_source.index(version)).nil?
193
+ i += version.size
194
+ # complete version when patchMinor is not matched but still there
195
+ if ua_source[i] == '.' # we built the version with dots
196
+ if patch_minor = ua_source.index(' ', i + 1)
197
+ patch_minor = ua_source[i + 1...patch_minor]
198
+ if patch_minor.eql? patch_minor.to_i.to_s
199
+ version = "#{version}.#{patch_minor}"
200
+ end
201
+ end
202
+ end
203
+ version
204
+ end
139
205
 
140
- if (ua_version = ua_data.userAgent)
141
- event.set(@prefixed_major, ua_version.major.dup.force_encoding(Encoding::UTF_8)) if ua_version.major
142
- event.set(@prefixed_minor, ua_version.minor.dup.force_encoding(Encoding::UTF_8)) if ua_version.minor
143
- event.set(@prefixed_patch, ua_version.patch.dup.force_encoding(Encoding::UTF_8)) if ua_version.patch
144
- event.set(@prefixed_build, ua_version.patchMinor.dup.force_encoding(Encoding::UTF_8)) if ua_version.patchMinor
206
+ # reconstructs the OS version string
207
+ def build_os_version(os)
208
+ # NOTE: UA regexes don't always give us the versions back
209
+ # they do get "corrected" for various OSes such as:
210
+ # - Windows (Windows NT 6.0 => 'Vista')
211
+ # - Windows ('Windows NT 6.3' => '8','1')
212
+ # - Windows ('Windows NT 10.0' => '10')
213
+ # - iOS ('Darwin/15.5' => '9','3','2')
214
+ return unless major = os.major
215
+ if major.to_i.to_s == major
216
+ version, sep = duped_string(major), '.'
217
+ else
218
+ version, sep = duped_string(major), ' '
145
219
  end
220
+ if os.minor
221
+ version << sep << os.minor
222
+ if os.patch
223
+ version << '.' << os.patch
224
+ if os.patchMinor
225
+ version << '.' << os.patchMinor
226
+ end
227
+ end
228
+ end
229
+ version
146
230
  end
231
+
232
+ def duped_string(str)
233
+ # Calls in here use #dup because there's potential for later filters to modify these values
234
+ # and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
235
+ str.dup.force_encoding(Encoding::UTF_8)
236
+ end
237
+
147
238
  end
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
22
22
 
23
23
  # Gem dependencies
24
24
  s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
25
+ s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3'
25
26
  s.add_development_dependency 'logstash-devutils'
26
27
  end
27
28