logstash-filter-useragent 3.2.2-java → 3.3.2-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/LICENSE +199 -10
- data/README.md +1 -1
- data/docs/index.asciidoc +95 -11
- data/lib/logstash/filters/useragent.rb +146 -55
- data/logstash-filter-useragent.gemspec +1 -0
- data/spec/filters/useragent_spec.rb +429 -69
- data/vendor/jar-dependencies/org/logstash/filters/logstash-filter-useragent/{3.2.2/logstash-filter-useragent-3.2.2.jar → 3.3.2/logstash-filter-useragent-3.3.2.jar} +0 -0
- data/version +1 -1
- metadata +18 -5
@@ -1,10 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
require "java"
|
3
2
|
require "logstash-filter-useragent_jars"
|
4
3
|
require "logstash/filters/base"
|
5
4
|
require "logstash/namespace"
|
6
|
-
require
|
7
|
-
require "thread"
|
5
|
+
require 'logstash/plugin_mixins/ecs_compatibility_support'
|
8
6
|
|
9
7
|
# Parse user agent strings into structured data based on BrowserScope data
|
10
8
|
#
|
@@ -16,6 +14,8 @@ require "thread"
|
|
16
14
|
# <https://github.com/tobie/ua-parser/>.
|
17
15
|
class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
18
16
|
|
17
|
+
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
18
|
+
|
19
19
|
config_name "useragent"
|
20
20
|
|
21
21
|
# The field containing the user agent string. If this field is an
|
@@ -25,7 +25,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
25
25
|
# The name of the field to assign user agent data into.
|
26
26
|
#
|
27
27
|
# If not specified user agent data will be stored in the root of the event.
|
28
|
-
config :target, :validate => :string
|
28
|
+
config :target, :validate => :string # default [user_agent] in ECS mode
|
29
29
|
|
30
30
|
# `regexes.yaml` file to use
|
31
31
|
#
|
@@ -37,7 +37,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
37
37
|
config :regexes, :validate => :string
|
38
38
|
|
39
39
|
# A string to prepend to all of the extracted keys
|
40
|
-
config :prefix, :validate => :string, :default => ''
|
40
|
+
config :prefix, :validate => :string, :default => '' # not supported in ECS mode
|
41
41
|
|
42
42
|
# UA parsing is surprisingly expensive. This filter uses an LRU cache to take advantage of the fact that
|
43
43
|
# user agents are often found adjacent to one another in log files and rarely have a random distribution.
|
@@ -55,7 +55,46 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
55
55
|
# number of cache misses and waste memory.
|
56
56
|
config :lru_cache_size, :validate => :number, :default => 100_000
|
57
57
|
|
58
|
+
def initialize(*params)
|
59
|
+
super
|
60
|
+
|
61
|
+
# make @target in the format [field name] if defined, i.e. surrounded by brackets
|
62
|
+
target = @target || ecs_select[disabled: '', v1: '[user_agent]']
|
63
|
+
target = "[#{@target}]" if !target.empty? && target !~ /^\[[^\[\]]+\]$/
|
64
|
+
|
65
|
+
@name_field = ecs_select[disabled: "[#{@prefix}name]", v1: '[name]']
|
66
|
+
@name_field = "#{target}#{@name_field}"
|
67
|
+
|
68
|
+
@device_name_field = ecs_select[disabled: "[#{@prefix}device]", v1: '[device][name]']
|
69
|
+
@device_name_field = "#{target}#{@device_name_field}"
|
70
|
+
|
71
|
+
@version_field = ecs_select[disabled: "[#{@prefix}version]", v1: '[version]']
|
72
|
+
@version_field = "#{target}#{@version_field}"
|
73
|
+
@major_field = ecs_select[disabled: "#{target}[#{@prefix}major]", v1: "[@metadata][filter][user_agent][version][major]"]
|
74
|
+
@minor_field = ecs_select[disabled: "#{target}[#{@prefix}minor]", v1: "[@metadata][filter][user_agent][version][minor]"]
|
75
|
+
@patch_field = ecs_select[disabled: "#{target}[#{@prefix}patch]", v1: "[@metadata][filter][user_agent][version][patch]"]
|
76
|
+
|
77
|
+
@os_full_name_field = ecs_select[disabled: "[#{@prefix}os_full]", v1: '[os][full]'] # did not exist in legacy prior to ECS-ification
|
78
|
+
@os_full_name_field = "#{target}#{@os_full_name_field}"
|
79
|
+
|
80
|
+
@os_name_field = ecs_select[disabled: "[#{@prefix}os_name]", v1: '[os][name]']
|
81
|
+
@os_name_field = "#{target}#{@os_name_field}"
|
82
|
+
@legacy_os_field = ecs_select[disabled: "#{target}[#{@prefix}os]", v1: nil] # same as [os_name] in legacy mode
|
83
|
+
|
84
|
+
@os_version_field = ecs_select[disabled: "[#{@prefix}os_version]", v1: '[os][version]']
|
85
|
+
@os_version_field = "#{target}#{@os_version_field}"
|
86
|
+
@os_major_field = ecs_select[disabled: "#{target}[#{@prefix}os_major]", v1: "[@metadata][filter][user_agent][os][version][major]"]
|
87
|
+
@os_minor_field = ecs_select[disabled: "#{target}[#{@prefix}os_minor]", v1: "[@metadata][filter][user_agent][os][version][minor]"]
|
88
|
+
@os_patch_field = ecs_select[disabled: "#{target}[#{@prefix}os_patch]", v1: "[@metadata][filter][user_agent][os][version][patch]"]
|
89
|
+
|
90
|
+
# NOTE: unfortunately we can not reliably provide `user_agent.original` since the patterns do not
|
91
|
+
# reliably give back the matched group and they support the UA string prefixed and/or suffixed
|
92
|
+
end
|
93
|
+
|
58
94
|
def register
|
95
|
+
if ecs_compatibility != :disabled && @prefix && !@prefix.empty?
|
96
|
+
@logger.warn "Field prefix isn't supported in ECS compatibility mode, please remove `prefix => #{@prefix.inspect}`"
|
97
|
+
end
|
59
98
|
|
60
99
|
if @regexes.nil?
|
61
100
|
@parser = org.logstash.uaparser.CachingParser.new(lru_cache_size)
|
@@ -63,21 +102,6 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
63
102
|
@logger.debug("Using user agent regexes", :regexes => @regexes)
|
64
103
|
@parser = org.logstash.uaparser.CachingParser.new(@regexes, lru_cache_size)
|
65
104
|
end
|
66
|
-
|
67
|
-
# make @target in the format [field name] if defined, i.e. surrounded by brakets
|
68
|
-
normalized_target = (@target && @target !~ /^\[[^\[\]]+\]$/) ? "[#{@target}]" : ""
|
69
|
-
|
70
|
-
# predefine prefixed field names
|
71
|
-
@prefixed_name = "#{normalized_target}[#{@prefix}name]"
|
72
|
-
@prefixed_os = "#{normalized_target}[#{@prefix}os]"
|
73
|
-
@prefixed_os_name = "#{normalized_target}[#{@prefix}os_name]"
|
74
|
-
@prefixed_os_major = "#{normalized_target}[#{@prefix}os_major]"
|
75
|
-
@prefixed_os_minor = "#{normalized_target}[#{@prefix}os_minor]"
|
76
|
-
@prefixed_device = "#{normalized_target}[#{@prefix}device]"
|
77
|
-
@prefixed_major = "#{normalized_target}[#{@prefix}major]"
|
78
|
-
@prefixed_minor = "#{normalized_target}[#{@prefix}minor]"
|
79
|
-
@prefixed_patch = "#{normalized_target}[#{@prefix}patch]"
|
80
|
-
@prefixed_build = "#{normalized_target}[#{@prefix}build]"
|
81
105
|
end
|
82
106
|
|
83
107
|
def filter(event)
|
@@ -88,60 +112,127 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
88
112
|
|
89
113
|
begin
|
90
114
|
ua_data = lookup_useragent(useragent)
|
91
|
-
rescue
|
92
|
-
@logger.error("
|
115
|
+
rescue => e
|
116
|
+
@logger.error("Unknown error while parsing user agent data",
|
117
|
+
:exception => e.class, :message => e.message, :backtrace => e.backtrace,
|
118
|
+
:field => @source, :event => event.to_hash)
|
93
119
|
return
|
94
120
|
end
|
95
121
|
|
96
122
|
return unless ua_data
|
97
123
|
|
98
124
|
event.remove(@source) if @target == @source
|
99
|
-
set_fields(event, ua_data)
|
125
|
+
set_fields(event, useragent, ua_data)
|
100
126
|
|
101
127
|
filter_matched(event)
|
102
128
|
end
|
103
129
|
|
104
|
-
|
105
|
-
# TODO: (colin) the related specs should be refactored to not rely on private methods.
|
106
|
-
def lookup_useragent(useragent)
|
107
|
-
return unless useragent
|
130
|
+
private
|
108
131
|
|
109
|
-
|
110
|
-
# caused by the underlying JRuby regex code that is not thread safe.
|
111
|
-
# see https://github.com/logstash-plugins/logstash-filter-useragent/issues/25
|
132
|
+
def lookup_useragent(useragent)
|
112
133
|
@parser.parse(useragent)
|
113
134
|
end
|
114
135
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
event.set(@
|
121
|
-
|
122
|
-
|
136
|
+
def set_fields(event, ua_source, ua_data)
|
137
|
+
# UserAgentParser strings are US-ASCII
|
138
|
+
|
139
|
+
ua = ua_data.userAgent
|
140
|
+
event.set(@name_field, duped_string(ua.family))
|
141
|
+
event.set(@device_name_field, duped_string(ua_data.device)) if ua_data.device
|
142
|
+
|
143
|
+
event.set(@major_field, duped_string(ua.major)) if ua.major
|
144
|
+
event.set(@minor_field, duped_string(ua.minor)) if ua.minor
|
145
|
+
event.set(@patch_field, duped_string(ua.patch)) if ua.patch
|
146
|
+
set_version(event, ua_source, ua) # UA version string e.g. "89.0.4389.90"
|
147
|
+
|
148
|
+
os = ua_data.os
|
149
|
+
if os
|
150
|
+
# os.major, os.minor, ... are all strings
|
151
|
+
event.set(@os_major_field, duped_string(os.major)) if os.major # e.g. 'Vista' or '10'
|
152
|
+
event.set(@os_minor_field, duped_string(os.minor)) if os.minor
|
153
|
+
event.set(@os_patch_field, duped_string(os.patch)) if os.patch
|
154
|
+
os_version = build_os_version(os)
|
155
|
+
event.set(@os_version_field, os_version) if os_version
|
156
|
+
|
157
|
+
os_name = os.family
|
158
|
+
if os_name
|
159
|
+
os_name = duped_string(os_name)
|
160
|
+
event.set(@os_name_field, os_name)
|
161
|
+
event.set(@legacy_os_field, os_name.dup) if @legacy_os_field
|
162
|
+
os_full_name = os_name.dup
|
163
|
+
os_full_name << ' ' << os_version if os_version
|
164
|
+
event.set(@os_full_name_field, os_full_name)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
123
168
|
|
124
|
-
|
125
|
-
|
126
|
-
if
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
169
|
+
# reconstruct and set the User-Agent version string
|
170
|
+
def set_version(event, ua_source, ua)
|
171
|
+
if @version_field && ua.major
|
172
|
+
# only Chrome has all 4 segments, while Firefox only uses major.minor
|
173
|
+
version = duped_string(ua.major)
|
174
|
+
if ua.minor
|
175
|
+
version << '.' << ua.minor
|
176
|
+
if ua.patch
|
177
|
+
version << '.' << ua.patch
|
178
|
+
if ua.patchMinor
|
179
|
+
version << '.' << ua.patchMinor
|
180
|
+
else
|
181
|
+
adjusted_version = check_and_adjust_version(ua_source, version)
|
182
|
+
version = adjusted_version if adjusted_version
|
183
|
+
end
|
184
|
+
end
|
135
185
|
end
|
186
|
+
event.set(@version_field, version)
|
136
187
|
end
|
188
|
+
end
|
137
189
|
|
138
|
-
|
190
|
+
def check_and_adjust_version(ua_source, version)
|
191
|
+
# only set OS version if it's not 'interpreted' (contained in UA string)
|
192
|
+
return nil if !version || (i = ua_source.index(version)).nil?
|
193
|
+
i += version.size
|
194
|
+
# complete version when patchMinor is not matched but still there
|
195
|
+
if ua_source[i] == '.' # we built the version with dots
|
196
|
+
if patch_minor = ua_source.index(' ', i + 1)
|
197
|
+
patch_minor = ua_source[i + 1...patch_minor]
|
198
|
+
if patch_minor.eql? patch_minor.to_i.to_s
|
199
|
+
version = "#{version}.#{patch_minor}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
version
|
204
|
+
end
|
139
205
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
206
|
+
# reconstructs the OS version string
|
207
|
+
def build_os_version(os)
|
208
|
+
# NOTE: UA regexes don't always give us the versions back
|
209
|
+
# they do get "corrected" for various OSes such as:
|
210
|
+
# - Windows (Windows NT 6.0 => 'Vista')
|
211
|
+
# - Windows ('Windows NT 6.3' => '8','1')
|
212
|
+
# - Windows ('Windows NT 10.0' => '10')
|
213
|
+
# - iOS ('Darwin/15.5' => '9','3','2')
|
214
|
+
return unless major = os.major
|
215
|
+
if major.to_i.to_s == major
|
216
|
+
version, sep = duped_string(major), '.'
|
217
|
+
else
|
218
|
+
version, sep = duped_string(major), ' '
|
145
219
|
end
|
220
|
+
if os.minor
|
221
|
+
version << sep << os.minor
|
222
|
+
if os.patch
|
223
|
+
version << '.' << os.patch
|
224
|
+
if os.patchMinor
|
225
|
+
version << '.' << os.patchMinor
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
version
|
146
230
|
end
|
231
|
+
|
232
|
+
def duped_string(str)
|
233
|
+
# Calls in here use #dup because there's potential for later filters to modify these values
|
234
|
+
# and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
|
235
|
+
str.dup.force_encoding(Encoding::UTF_8)
|
236
|
+
end
|
237
|
+
|
147
238
|
end
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
|
23
23
|
# Gem dependencies
|
24
24
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
25
|
+
s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3'
|
25
26
|
s.add_development_dependency 'logstash-devutils'
|
26
27
|
end
|
27
28
|
|