logstash-filter-useragent 3.2.2-java → 3.3.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +28 -0
- data/LICENSE +199 -10
- data/README.md +1 -1
- data/docs/index.asciidoc +95 -11
- data/lib/logstash/filters/useragent.rb +146 -55
- data/logstash-filter-useragent.gemspec +1 -0
- data/spec/filters/useragent_spec.rb +429 -69
- data/vendor/jar-dependencies/org/logstash/filters/logstash-filter-useragent/{3.2.2/logstash-filter-useragent-3.2.2.jar → 3.3.2/logstash-filter-useragent-3.3.2.jar} +0 -0
- data/version +1 -1
- metadata +18 -5
@@ -1,10 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
require "java"
|
3
2
|
require "logstash-filter-useragent_jars"
|
4
3
|
require "logstash/filters/base"
|
5
4
|
require "logstash/namespace"
|
6
|
-
require
|
7
|
-
require "thread"
|
5
|
+
require 'logstash/plugin_mixins/ecs_compatibility_support'
|
8
6
|
|
9
7
|
# Parse user agent strings into structured data based on BrowserScope data
|
10
8
|
#
|
@@ -16,6 +14,8 @@ require "thread"
|
|
16
14
|
# <https://github.com/tobie/ua-parser/>.
|
17
15
|
class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
18
16
|
|
17
|
+
include LogStash::PluginMixins::ECSCompatibilitySupport(:disabled, :v1, :v8 => :v1)
|
18
|
+
|
19
19
|
config_name "useragent"
|
20
20
|
|
21
21
|
# The field containing the user agent string. If this field is an
|
@@ -25,7 +25,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
25
25
|
# The name of the field to assign user agent data into.
|
26
26
|
#
|
27
27
|
# If not specified user agent data will be stored in the root of the event.
|
28
|
-
config :target, :validate => :string
|
28
|
+
config :target, :validate => :string # default [user_agent] in ECS mode
|
29
29
|
|
30
30
|
# `regexes.yaml` file to use
|
31
31
|
#
|
@@ -37,7 +37,7 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
37
37
|
config :regexes, :validate => :string
|
38
38
|
|
39
39
|
# A string to prepend to all of the extracted keys
|
40
|
-
config :prefix, :validate => :string, :default => ''
|
40
|
+
config :prefix, :validate => :string, :default => '' # not supported in ECS mode
|
41
41
|
|
42
42
|
# UA parsing is surprisingly expensive. This filter uses an LRU cache to take advantage of the fact that
|
43
43
|
# user agents are often found adjacent to one another in log files and rarely have a random distribution.
|
@@ -55,7 +55,46 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
55
55
|
# number of cache misses and waste memory.
|
56
56
|
config :lru_cache_size, :validate => :number, :default => 100_000
|
57
57
|
|
58
|
+
def initialize(*params)
|
59
|
+
super
|
60
|
+
|
61
|
+
# make @target in the format [field name] if defined, i.e. surrounded by brackets
|
62
|
+
target = @target || ecs_select[disabled: '', v1: '[user_agent]']
|
63
|
+
target = "[#{@target}]" if !target.empty? && target !~ /^\[[^\[\]]+\]$/
|
64
|
+
|
65
|
+
@name_field = ecs_select[disabled: "[#{@prefix}name]", v1: '[name]']
|
66
|
+
@name_field = "#{target}#{@name_field}"
|
67
|
+
|
68
|
+
@device_name_field = ecs_select[disabled: "[#{@prefix}device]", v1: '[device][name]']
|
69
|
+
@device_name_field = "#{target}#{@device_name_field}"
|
70
|
+
|
71
|
+
@version_field = ecs_select[disabled: "[#{@prefix}version]", v1: '[version]']
|
72
|
+
@version_field = "#{target}#{@version_field}"
|
73
|
+
@major_field = ecs_select[disabled: "#{target}[#{@prefix}major]", v1: "[@metadata][filter][user_agent][version][major]"]
|
74
|
+
@minor_field = ecs_select[disabled: "#{target}[#{@prefix}minor]", v1: "[@metadata][filter][user_agent][version][minor]"]
|
75
|
+
@patch_field = ecs_select[disabled: "#{target}[#{@prefix}patch]", v1: "[@metadata][filter][user_agent][version][patch]"]
|
76
|
+
|
77
|
+
@os_full_name_field = ecs_select[disabled: "[#{@prefix}os_full]", v1: '[os][full]'] # did not exist in legacy prior to ECS-ification
|
78
|
+
@os_full_name_field = "#{target}#{@os_full_name_field}"
|
79
|
+
|
80
|
+
@os_name_field = ecs_select[disabled: "[#{@prefix}os_name]", v1: '[os][name]']
|
81
|
+
@os_name_field = "#{target}#{@os_name_field}"
|
82
|
+
@legacy_os_field = ecs_select[disabled: "#{target}[#{@prefix}os]", v1: nil] # same as [os_name] in legacy mode
|
83
|
+
|
84
|
+
@os_version_field = ecs_select[disabled: "[#{@prefix}os_version]", v1: '[os][version]']
|
85
|
+
@os_version_field = "#{target}#{@os_version_field}"
|
86
|
+
@os_major_field = ecs_select[disabled: "#{target}[#{@prefix}os_major]", v1: "[@metadata][filter][user_agent][os][version][major]"]
|
87
|
+
@os_minor_field = ecs_select[disabled: "#{target}[#{@prefix}os_minor]", v1: "[@metadata][filter][user_agent][os][version][minor]"]
|
88
|
+
@os_patch_field = ecs_select[disabled: "#{target}[#{@prefix}os_patch]", v1: "[@metadata][filter][user_agent][os][version][patch]"]
|
89
|
+
|
90
|
+
# NOTE: unfortunately we can not reliably provide `user_agent.original` since the patterns do not
|
91
|
+
# reliably give back the matched group and they support the UA string prefixed and/or suffixed
|
92
|
+
end
|
93
|
+
|
58
94
|
def register
|
95
|
+
if ecs_compatibility != :disabled && @prefix && !@prefix.empty?
|
96
|
+
@logger.warn "Field prefix isn't supported in ECS compatibility mode, please remove `prefix => #{@prefix.inspect}`"
|
97
|
+
end
|
59
98
|
|
60
99
|
if @regexes.nil?
|
61
100
|
@parser = org.logstash.uaparser.CachingParser.new(lru_cache_size)
|
@@ -63,21 +102,6 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
63
102
|
@logger.debug("Using user agent regexes", :regexes => @regexes)
|
64
103
|
@parser = org.logstash.uaparser.CachingParser.new(@regexes, lru_cache_size)
|
65
104
|
end
|
66
|
-
|
67
|
-
# make @target in the format [field name] if defined, i.e. surrounded by brakets
|
68
|
-
normalized_target = (@target && @target !~ /^\[[^\[\]]+\]$/) ? "[#{@target}]" : ""
|
69
|
-
|
70
|
-
# predefine prefixed field names
|
71
|
-
@prefixed_name = "#{normalized_target}[#{@prefix}name]"
|
72
|
-
@prefixed_os = "#{normalized_target}[#{@prefix}os]"
|
73
|
-
@prefixed_os_name = "#{normalized_target}[#{@prefix}os_name]"
|
74
|
-
@prefixed_os_major = "#{normalized_target}[#{@prefix}os_major]"
|
75
|
-
@prefixed_os_minor = "#{normalized_target}[#{@prefix}os_minor]"
|
76
|
-
@prefixed_device = "#{normalized_target}[#{@prefix}device]"
|
77
|
-
@prefixed_major = "#{normalized_target}[#{@prefix}major]"
|
78
|
-
@prefixed_minor = "#{normalized_target}[#{@prefix}minor]"
|
79
|
-
@prefixed_patch = "#{normalized_target}[#{@prefix}patch]"
|
80
|
-
@prefixed_build = "#{normalized_target}[#{@prefix}build]"
|
81
105
|
end
|
82
106
|
|
83
107
|
def filter(event)
|
@@ -88,60 +112,127 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
88
112
|
|
89
113
|
begin
|
90
114
|
ua_data = lookup_useragent(useragent)
|
91
|
-
rescue
|
92
|
-
@logger.error("
|
115
|
+
rescue => e
|
116
|
+
@logger.error("Unknown error while parsing user agent data",
|
117
|
+
:exception => e.class, :message => e.message, :backtrace => e.backtrace,
|
118
|
+
:field => @source, :event => event.to_hash)
|
93
119
|
return
|
94
120
|
end
|
95
121
|
|
96
122
|
return unless ua_data
|
97
123
|
|
98
124
|
event.remove(@source) if @target == @source
|
99
|
-
set_fields(event, ua_data)
|
125
|
+
set_fields(event, useragent, ua_data)
|
100
126
|
|
101
127
|
filter_matched(event)
|
102
128
|
end
|
103
129
|
|
104
|
-
|
105
|
-
# TODO: (colin) the related specs should be refactored to not rely on private methods.
|
106
|
-
def lookup_useragent(useragent)
|
107
|
-
return unless useragent
|
130
|
+
private
|
108
131
|
|
109
|
-
|
110
|
-
# caused by the underlying JRuby regex code that is not thread safe.
|
111
|
-
# see https://github.com/logstash-plugins/logstash-filter-useragent/issues/25
|
132
|
+
def lookup_useragent(useragent)
|
112
133
|
@parser.parse(useragent)
|
113
134
|
end
|
114
135
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
event.set(@
|
121
|
-
|
122
|
-
|
136
|
+
def set_fields(event, ua_source, ua_data)
|
137
|
+
# UserAgentParser strings are US-ASCII
|
138
|
+
|
139
|
+
ua = ua_data.userAgent
|
140
|
+
event.set(@name_field, duped_string(ua.family))
|
141
|
+
event.set(@device_name_field, duped_string(ua_data.device)) if ua_data.device
|
142
|
+
|
143
|
+
event.set(@major_field, duped_string(ua.major)) if ua.major
|
144
|
+
event.set(@minor_field, duped_string(ua.minor)) if ua.minor
|
145
|
+
event.set(@patch_field, duped_string(ua.patch)) if ua.patch
|
146
|
+
set_version(event, ua_source, ua) # UA version string e.g. "89.0.4389.90"
|
147
|
+
|
148
|
+
os = ua_data.os
|
149
|
+
if os
|
150
|
+
# os.major, os.minor, ... are all strings
|
151
|
+
event.set(@os_major_field, duped_string(os.major)) if os.major # e.g. 'Vista' or '10'
|
152
|
+
event.set(@os_minor_field, duped_string(os.minor)) if os.minor
|
153
|
+
event.set(@os_patch_field, duped_string(os.patch)) if os.patch
|
154
|
+
os_version = build_os_version(os)
|
155
|
+
event.set(@os_version_field, os_version) if os_version
|
156
|
+
|
157
|
+
os_name = os.family
|
158
|
+
if os_name
|
159
|
+
os_name = duped_string(os_name)
|
160
|
+
event.set(@os_name_field, os_name)
|
161
|
+
event.set(@legacy_os_field, os_name.dup) if @legacy_os_field
|
162
|
+
os_full_name = os_name.dup
|
163
|
+
os_full_name << ' ' << os_version if os_version
|
164
|
+
event.set(@os_full_name_field, os_full_name)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
123
168
|
|
124
|
-
|
125
|
-
|
126
|
-
if
|
127
|
-
#
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
169
|
+
# reconstruct and set the User-Agent version string
|
170
|
+
def set_version(event, ua_source, ua)
|
171
|
+
if @version_field && ua.major
|
172
|
+
# only Chrome has all 4 segments, while Firefox only uses major.minor
|
173
|
+
version = duped_string(ua.major)
|
174
|
+
if ua.minor
|
175
|
+
version << '.' << ua.minor
|
176
|
+
if ua.patch
|
177
|
+
version << '.' << ua.patch
|
178
|
+
if ua.patchMinor
|
179
|
+
version << '.' << ua.patchMinor
|
180
|
+
else
|
181
|
+
adjusted_version = check_and_adjust_version(ua_source, version)
|
182
|
+
version = adjusted_version if adjusted_version
|
183
|
+
end
|
184
|
+
end
|
135
185
|
end
|
186
|
+
event.set(@version_field, version)
|
136
187
|
end
|
188
|
+
end
|
137
189
|
|
138
|
-
|
190
|
+
def check_and_adjust_version(ua_source, version)
|
191
|
+
# only set OS version if it's not 'interpreted' (contained in UA string)
|
192
|
+
return nil if !version || (i = ua_source.index(version)).nil?
|
193
|
+
i += version.size
|
194
|
+
# complete version when patchMinor is not matched but still there
|
195
|
+
if ua_source[i] == '.' # we built the version with dots
|
196
|
+
if patch_minor = ua_source.index(' ', i + 1)
|
197
|
+
patch_minor = ua_source[i + 1...patch_minor]
|
198
|
+
if patch_minor.eql? patch_minor.to_i.to_s
|
199
|
+
version = "#{version}.#{patch_minor}"
|
200
|
+
end
|
201
|
+
end
|
202
|
+
end
|
203
|
+
version
|
204
|
+
end
|
139
205
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
206
|
+
# reconstructs the OS version string
|
207
|
+
def build_os_version(os)
|
208
|
+
# NOTE: UA regexes don't always give us the versions back
|
209
|
+
# they do get "corrected" for various OSes such as:
|
210
|
+
# - Windows (Windows NT 6.0 => 'Vista')
|
211
|
+
# - Windows ('Windows NT 6.3' => '8','1')
|
212
|
+
# - Windows ('Windows NT 10.0' => '10')
|
213
|
+
# - iOS ('Darwin/15.5' => '9','3','2')
|
214
|
+
return unless major = os.major
|
215
|
+
if major.to_i.to_s == major
|
216
|
+
version, sep = duped_string(major), '.'
|
217
|
+
else
|
218
|
+
version, sep = duped_string(major), ' '
|
145
219
|
end
|
220
|
+
if os.minor
|
221
|
+
version << sep << os.minor
|
222
|
+
if os.patch
|
223
|
+
version << '.' << os.patch
|
224
|
+
if os.patchMinor
|
225
|
+
version << '.' << os.patchMinor
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
version
|
146
230
|
end
|
231
|
+
|
232
|
+
def duped_string(str)
|
233
|
+
# Calls in here use #dup because there's potential for later filters to modify these values
|
234
|
+
# and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
|
235
|
+
str.dup.force_encoding(Encoding::UTF_8)
|
236
|
+
end
|
237
|
+
|
147
238
|
end
|
@@ -22,6 +22,7 @@ Gem::Specification.new do |s|
|
|
22
22
|
|
23
23
|
# Gem dependencies
|
24
24
|
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
25
|
+
s.add_runtime_dependency 'logstash-mixin-ecs_compatibility_support', '~> 1.3'
|
25
26
|
s.add_development_dependency 'logstash-devutils'
|
26
27
|
end
|
27
28
|
|