logstash-filter-useragent 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -0
- data/README.md +4 -4
- data/lib/logstash/filters/useragent.rb +66 -28
- data/logstash-filter-useragent.gemspec +3 -2
- data/spec/filters/useragent_spec.rb +47 -0
- metadata +36 -25
- data/.gitignore +0 -4
- data/Rakefile +0 -10
- data/vendor.json +0 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 635e0d9c99d16e89f80e2636ff9fd490df621b4b
|
4
|
+
data.tar.gz: a39b3167347dc9b389048584d6da3feb8b4658c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0667b4eb93b90c48c4466f1b2385907ecdeb21acec0fed7a3adaff3e2dd8d113ad9cfb857fc1957b0592202e9279e35f34d27832331125303369919965a21d37
|
7
|
+
data.tar.gz: 493a091f47bff9eb965af0fc46e58b3936558c61bf2a2f7c5c400225fbc71e7ed833ed75fa05848da941e9514cba7d33080e33a8b1d4bb2b26ab6b33d0910085
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# Logstash Plugin
|
2
2
|
|
3
|
-
This is a plugin for [Logstash](https://github.com/
|
3
|
+
This is a plugin for [Logstash](https://github.com/elastic/logstash).
|
4
4
|
|
5
5
|
It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
|
6
6
|
|
7
7
|
## Documentation
|
8
8
|
|
9
|
-
Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.
|
9
|
+
Logstash provides infrastructure to automatically generate documentation for this plugin. We use the asciidoc format to write documentation so any comments in the source code will be first converted into asciidoc and then into html. All plugin documentation are placed under one [central location](http://www.elastic.co/guide/en/logstash/current/).
|
10
10
|
|
11
11
|
- For formatting code or config example, you can use the asciidoc `[source,ruby]` directive
|
12
|
-
- For more asciidoc formatting tips, see the excellent reference here https://github.com/
|
12
|
+
- For more asciidoc formatting tips, see the excellent reference here https://github.com/elastic/docs#asciidoc-guide
|
13
13
|
|
14
14
|
## Need Help?
|
15
15
|
|
@@ -83,4 +83,4 @@ Programming is not a required skill. Whatever you've seen about open source and
|
|
83
83
|
|
84
84
|
It is more important to the community that you are able to contribute.
|
85
85
|
|
86
|
-
For more information about contributing, see the [CONTRIBUTING](https://github.com/
|
86
|
+
For more information about contributing, see the [CONTRIBUTING](https://github.com/elastic/logstash/blob/master/CONTRIBUTING.md) file.
|
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require "logstash/filters/base"
|
3
3
|
require "logstash/namespace"
|
4
|
+
require "lru_redux"
|
4
5
|
require "tempfile"
|
5
6
|
|
6
7
|
# Parse user agent strings into structured data based on BrowserScope data
|
@@ -12,6 +13,8 @@ require "tempfile"
|
|
12
13
|
# ua-parser with an Apache 2.0 license. For more details on ua-parser, see
|
13
14
|
# <https://github.com/tobie/ua-parser/>.
|
14
15
|
class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
16
|
+
LOOKUP_CACHE = LruRedux::ThreadSafeCache.new(1000)
|
17
|
+
|
15
18
|
config_name "useragent"
|
16
19
|
|
17
20
|
# The field containing the user agent string. If this field is an
|
@@ -35,6 +38,22 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
35
38
|
# A string to prepend to all of the extracted keys
|
36
39
|
config :prefix, :validate => :string, :default => ''
|
37
40
|
|
41
|
+
# UA parsing is surprisingly expensive. This filter uses an LRU cache to take advantage of the fact that
|
42
|
+
# user agents are often found adjacent to one another in log files and rarely have a random distribution.
|
43
|
+
# The higher you set this the more likely an item is to be in the cache and the faster this filter will run.
|
44
|
+
# However, if you set this too high you can use more memory than desired.
|
45
|
+
#
|
46
|
+
# Experiment with different values for this option to find the best performance for your dataset.
|
47
|
+
#
|
48
|
+
# This MUST be set to a value > 0. There is really no reason to not want this behavior, the overhead is minimal
|
49
|
+
# and the speed gains are large.
|
50
|
+
#
|
51
|
+
# It is important to note that this config value is global. That is to say all instances of the user agent filter
|
52
|
+
# share the same cache. The last declared cache size will 'win'. The reason for this is that there would be no benefit
|
53
|
+
# to having multiple caches for different instances at different points in the pipeline, that would just increase the
|
54
|
+
# number of cache misses and waste memory.
|
55
|
+
config :lru_cache_size, :validate => :number, :default => 1000
|
56
|
+
|
38
57
|
public
|
39
58
|
def register
|
40
59
|
require 'user_agent_parser'
|
@@ -53,55 +72,74 @@ class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
|
53
72
|
@logger.info("Using user agent regexes", :regexes => @regexes)
|
54
73
|
@parser = UserAgentParser::Parser.new(:patterns_path => @regexes)
|
55
74
|
end
|
75
|
+
|
76
|
+
LOOKUP_CACHE.max_size = @lru_cache_size
|
56
77
|
end #def register
|
57
78
|
|
58
79
|
public
|
59
80
|
def filter(event)
|
60
81
|
return unless filter?(event)
|
61
|
-
ua_data = nil
|
62
82
|
|
63
83
|
useragent = event[@source]
|
64
84
|
useragent = useragent.first if useragent.is_a? Array
|
65
85
|
|
66
86
|
begin
|
67
|
-
ua_data =
|
68
|
-
rescue
|
87
|
+
ua_data = lookup_useragent(useragent)
|
88
|
+
rescue StandardError => e
|
69
89
|
@logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
|
90
|
+
return
|
70
91
|
end
|
71
92
|
|
72
|
-
|
73
|
-
if @target.nil?
|
74
|
-
# default write to the root of the event
|
75
|
-
target = event
|
76
|
-
else
|
77
|
-
target = event[@target] ||= {}
|
78
|
-
end
|
93
|
+
return unless ua_data
|
79
94
|
|
80
|
-
|
95
|
+
target = @target.nil? ? event : (event[@target] ||= {})
|
96
|
+
write_to_target(target, ua_data)
|
81
97
|
|
82
|
-
|
98
|
+
filter_matched(event)
|
99
|
+
end # def filter
|
83
100
|
|
84
|
-
|
85
|
-
|
86
|
-
target[@prefix + "os"] = ua_data.os.to_s.force_encoding(Encoding::UTF_8)
|
87
|
-
target[@prefix + "os_name"] = ua_data.os.name.to_s.force_encoding(Encoding::UTF_8)
|
88
|
-
target[@prefix + "os_major"] = ua_data.os.version.major.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
|
89
|
-
target[@prefix + "os_minor"] = ua_data.os.version.minor.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
|
90
|
-
end
|
101
|
+
def lookup_useragent(useragent)
|
102
|
+
return unless useragent
|
91
103
|
|
92
|
-
|
104
|
+
cached = LOOKUP_CACHE[useragent]
|
105
|
+
return cached if cached
|
93
106
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
107
|
+
ua_data = @parser.parse(useragent)
|
108
|
+
|
109
|
+
LOOKUP_CACHE[useragent] = ua_data
|
110
|
+
ua_data
|
111
|
+
end
|
112
|
+
|
113
|
+
def write_to_target(target, ua_data)
|
114
|
+
# UserAgentParser outputs as US-ASCII.
|
115
|
+
|
116
|
+
target[@prefix + "name"] = ua_data.name.dup.force_encoding(Encoding::UTF_8)
|
117
|
+
|
118
|
+
#OSX, Andriod and maybe iOS parse correctly, ua-agent parsing for Windows does not provide this level of detail
|
119
|
+
|
120
|
+
# Calls in here use #dup because there's potential for later filters to modify these values
|
121
|
+
# and corrupt the cache. See uap source here for details https://github.com/ua-parser/uap-ruby/tree/master/lib/user_agent_parser
|
122
|
+
if (os = ua_data.os)
|
123
|
+
# The OS is a rich object
|
124
|
+
target[@prefix + "os"] = ua_data.os.to_s.dup.force_encoding(Encoding::UTF_8)
|
125
|
+
target[@prefix + "os_name"] = os.name.dup.force_encoding(Encoding::UTF_8) if os.name
|
126
|
+
|
127
|
+
# These are all strings
|
128
|
+
if (os_version = os.version)
|
129
|
+
target[@prefix + "os_major"] = os_version.major.dup.force_encoding(Encoding::UTF_8) if os_version.major
|
130
|
+
target[@prefix + "os_minor"] = os_version.minor.dup.force_encoding(Encoding::UTF_8) if os_version.minor
|
100
131
|
end
|
132
|
+
end
|
101
133
|
|
102
|
-
|
134
|
+
target[@prefix + "device"] = ua_data.device.to_s.dup.force_encoding(Encoding::UTF_8) if ua_data.device
|
135
|
+
|
136
|
+
if (ua_version = ua_data.version)
|
137
|
+
target[@prefix + "major"] = ua_version.major.dup.force_encoding(Encoding::UTF_8) if ua_version.major
|
138
|
+
target[@prefix + "minor"] = ua_version.minor.dup.force_encoding(Encoding::UTF_8) if ua_version.minor
|
139
|
+
target[@prefix + "patch"] = ua_version.patch.dup.force_encoding(Encoding::UTF_8) if ua_version.patch
|
140
|
+
target[@prefix + "build"] = ua_version.patch_minor.dup.force_encoding(Encoding::UTF_8) if ua_version.patch_minor
|
103
141
|
end
|
142
|
+
end
|
104
143
|
|
105
|
-
end # def filter
|
106
144
|
end # class LogStash::Filters::UserAgent
|
107
145
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-useragent'
|
4
|
-
s.version = '1.0
|
4
|
+
s.version = '1.1.0'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "Parse user agent strings into structured data based on BrowserScope data"
|
7
7
|
s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.require_paths = ["lib"]
|
12
12
|
|
13
13
|
# Files
|
14
|
-
s.files =
|
14
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
|
15
15
|
|
16
16
|
# Tests
|
17
17
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
@@ -23,6 +23,7 @@ Gem::Specification.new do |s|
|
|
23
23
|
s.add_runtime_dependency "logstash-core", '>= 1.4.0', '< 2.0.0'
|
24
24
|
|
25
25
|
s.add_runtime_dependency 'user_agent_parser', ['>= 2.0.0']
|
26
|
+
s.add_runtime_dependency 'lru_redux', "~> 1.1.0"
|
26
27
|
s.add_development_dependency 'logstash-devutils'
|
27
28
|
end
|
28
29
|
|
@@ -40,4 +40,51 @@ describe LogStash::Filters::UserAgent do
|
|
40
40
|
insist { subject["minor"] } == "0"
|
41
41
|
end
|
42
42
|
end
|
43
|
+
|
44
|
+
describe "LRU object identity" do
|
45
|
+
let(:uafilter) { LogStash::Filters::UserAgent.new("source" => "foo") }
|
46
|
+
let(:ua_data) {
|
47
|
+
uafilter.lookup_useragent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36")
|
48
|
+
}
|
49
|
+
subject(:target) { {} }
|
50
|
+
|
51
|
+
before do
|
52
|
+
uafilter.register
|
53
|
+
|
54
|
+
# Stub this out because this UA doesn't have this field
|
55
|
+
allow(ua_data.version).to receive(:patch_minor).and_return("foo")
|
56
|
+
|
57
|
+
uafilter.write_to_target(target, ua_data)
|
58
|
+
end
|
59
|
+
|
60
|
+
{
|
61
|
+
"name" => lambda {|uad| uad.name},
|
62
|
+
"os" => lambda {|uad| uad.os.to_s},
|
63
|
+
"os_name" => lambda {|uad| uad.os.name},
|
64
|
+
"os_major" => lambda {|uad| uad.os.version.major},
|
65
|
+
"os_minor" => lambda {|uad| uad.os.version.minor},
|
66
|
+
"device" => lambda {|uad| uad.device.to_s},
|
67
|
+
"major" => lambda {|uad| uad.version.major},
|
68
|
+
"minor" => lambda {|uad| uad.version.minor},
|
69
|
+
"patch" => lambda {|uad| uad.version.patch},
|
70
|
+
"build" => lambda {|uad| uad.version.patch_minor}
|
71
|
+
}.each do |field, uad_getter|
|
72
|
+
context "for the #{field} field" do
|
73
|
+
let(:value) {uad_getter.call(ua_data)}
|
74
|
+
let(:target_field) { target[field]}
|
75
|
+
|
76
|
+
it "should not have a nil value" do
|
77
|
+
expect(target_field).to be_truthy
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should have equivalent values" do
|
81
|
+
expect(target_field).to eql(value)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should dup/clone the field to prevent cache corruption" do
|
85
|
+
expect(target_field.object_id).not_to eql(value.object_id)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
43
90
|
end
|
metadata
CHANGED
@@ -1,82 +1,92 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-useragent
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 1.4.0
|
19
|
+
- - "<"
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 2.0.0
|
14
22
|
name: logstash-core
|
23
|
+
prerelease: false
|
24
|
+
type: :runtime
|
15
25
|
version_requirements: !ruby/object:Gem::Requirement
|
16
26
|
requirements:
|
17
|
-
- -
|
27
|
+
- - ">="
|
18
28
|
- !ruby/object:Gem::Version
|
19
29
|
version: 1.4.0
|
20
|
-
- - <
|
30
|
+
- - "<"
|
21
31
|
- !ruby/object:Gem::Version
|
22
32
|
version: 2.0.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
23
34
|
requirement: !ruby/object:Gem::Requirement
|
24
35
|
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
version: 1.4.0
|
28
|
-
- - <
|
36
|
+
- - ">="
|
29
37
|
- !ruby/object:Gem::Version
|
30
38
|
version: 2.0.0
|
39
|
+
name: user_agent_parser
|
31
40
|
prerelease: false
|
32
41
|
type: :runtime
|
33
|
-
- !ruby/object:Gem::Dependency
|
34
|
-
name: user_agent_parser
|
35
42
|
version_requirements: !ruby/object:Gem::Requirement
|
36
43
|
requirements:
|
37
|
-
- -
|
44
|
+
- - ">="
|
38
45
|
- !ruby/object:Gem::Version
|
39
46
|
version: 2.0.0
|
47
|
+
- !ruby/object:Gem::Dependency
|
40
48
|
requirement: !ruby/object:Gem::Requirement
|
41
49
|
requirements:
|
42
|
-
- -
|
50
|
+
- - "~>"
|
43
51
|
- !ruby/object:Gem::Version
|
44
|
-
version:
|
52
|
+
version: 1.1.0
|
53
|
+
name: lru_redux
|
45
54
|
prerelease: false
|
46
55
|
type: :runtime
|
47
|
-
- !ruby/object:Gem::Dependency
|
48
|
-
name: logstash-devutils
|
49
56
|
version_requirements: !ruby/object:Gem::Requirement
|
50
57
|
requirements:
|
51
|
-
- -
|
58
|
+
- - "~>"
|
52
59
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
60
|
+
version: 1.1.0
|
61
|
+
- !ruby/object:Gem::Dependency
|
54
62
|
requirement: !ruby/object:Gem::Requirement
|
55
63
|
requirements:
|
56
|
-
- -
|
64
|
+
- - ">="
|
57
65
|
- !ruby/object:Gem::Version
|
58
66
|
version: '0'
|
67
|
+
name: logstash-devutils
|
59
68
|
prerelease: false
|
60
69
|
type: :development
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '0'
|
61
75
|
description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
|
62
76
|
email: info@elastic.co
|
63
77
|
executables: []
|
64
78
|
extensions: []
|
65
79
|
extra_rdoc_files: []
|
66
80
|
files:
|
67
|
-
- .gitignore
|
68
81
|
- CHANGELOG.md
|
69
82
|
- CONTRIBUTORS
|
70
83
|
- Gemfile
|
71
84
|
- LICENSE
|
72
85
|
- NOTICE.TXT
|
73
86
|
- README.md
|
74
|
-
- Rakefile
|
75
87
|
- lib/logstash/filters/useragent.rb
|
76
88
|
- logstash-filter-useragent.gemspec
|
77
89
|
- spec/filters/useragent_spec.rb
|
78
|
-
- vendor.json
|
79
|
-
- vendor/regexes.yaml
|
80
90
|
homepage: http://www.elastic.co/guide/en/logstash/current/index.html
|
81
91
|
licenses:
|
82
92
|
- Apache License (2.0)
|
@@ -89,19 +99,20 @@ require_paths:
|
|
89
99
|
- lib
|
90
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
91
101
|
requirements:
|
92
|
-
- -
|
102
|
+
- - ">="
|
93
103
|
- !ruby/object:Gem::Version
|
94
104
|
version: '0'
|
95
105
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
96
106
|
requirements:
|
97
|
-
- -
|
107
|
+
- - ">="
|
98
108
|
- !ruby/object:Gem::Version
|
99
109
|
version: '0'
|
100
110
|
requirements: []
|
101
111
|
rubyforge_project:
|
102
|
-
rubygems_version: 2.
|
112
|
+
rubygems_version: 2.4.8
|
103
113
|
signing_key:
|
104
114
|
specification_version: 4
|
105
115
|
summary: Parse user agent strings into structured data based on BrowserScope data
|
106
116
|
test_files:
|
107
117
|
- spec/filters/useragent_spec.rb
|
118
|
+
has_rdoc:
|
data/.gitignore
DELETED
data/Rakefile
DELETED
data/vendor.json
DELETED