logstash-filter-useragent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/useragent.rb +108 -0
- data/logstash-filter-useragent.gemspec +27 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/useragent_spec.rb +43 -0
- metadata +89 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
Njk1YTQ3YTUyMmM2NzU1YjkwMDAyODM0MjNlYjZmMTIzMWRlMWU1Nw==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MTJhODcyNzc3ZDFkNzdhODI3NWJkODBkYTBiNTNkNDAzZDNjOGQ0NA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YWIwMjUzNTM4MTNhNTYxMmUzOWY5MDhiYjE3OGQ2MzQ4NTdkY2FkMzE2MTk0
|
10
|
+
YTg2OWJlM2U0MDliNGRlZWMyM2ZiZDVmMWE3MjEyMTQxMTdkY2E0MmEyNDY3
|
11
|
+
OGQ1NGZjMjkwNGRiMDU0MGQxNzRjZTU1NmNkZTVhZDgwMjE0N2M=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZTlkODFlMTgzZGIzZDgxZmE4ZDgxYTUxMjQxMDEwYjZjNTIzOWU5ZTZmZmI1
|
14
|
+
MmVjOGQ1OWVhMTdmNjViMTUyZWFkZGU3MGE3MzNmN2Q0MTI1MDM4M2QwMGQ5
|
15
|
+
MmE2ZDBmM2U1OWE5Nzc2ZDg4MzA4NWMyNDIxNGJmNTUyY2I0ZmY=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,108 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "tempfile"
|
5
|
+
|
6
|
+
# Parse user agent strings into structured data based on BrowserScope data
|
7
|
+
#
|
8
|
+
# UserAgent filter, adds information about user agent like family, operating
|
9
|
+
# system, version, and device
|
10
|
+
#
|
11
|
+
# Logstash releases ship with the regexes.yaml database made available from
|
12
|
+
# ua-parser with an Apache 2.0 license. For more details on ua-parser, see
|
13
|
+
# <https://github.com/tobie/ua-parser/>.
|
14
|
+
class LogStash::Filters::UserAgent < LogStash::Filters::Base
|
15
|
+
config_name "useragent"
|
16
|
+
milestone 3
|
17
|
+
|
18
|
+
# The field containing the user agent string. If this field is an
|
19
|
+
# array, only the first value will be used.
|
20
|
+
config :source, :validate => :string, :required => true
|
21
|
+
|
22
|
+
# The name of the field to assign user agent data into.
|
23
|
+
#
|
24
|
+
# If not specified user agent data will be stored in the root of the event.
|
25
|
+
config :target, :validate => :string
|
26
|
+
|
27
|
+
# regexes.yaml file to use
|
28
|
+
#
|
29
|
+
# If not specified, this will default to the regexes.yaml that ships
|
30
|
+
# with logstash.
|
31
|
+
#
|
32
|
+
# You can find the latest version of this here:
|
33
|
+
# <https://github.com/tobie/ua-parser/blob/master/regexes.yaml>
|
34
|
+
config :regexes, :validate => :string
|
35
|
+
|
36
|
+
# A string to prepend to all of the extracted keys
|
37
|
+
config :prefix, :validate => :string, :default => ''
|
38
|
+
|
39
|
+
public
|
40
|
+
def register
|
41
|
+
require 'user_agent_parser'
|
42
|
+
if @regexes.nil?
|
43
|
+
begin
|
44
|
+
@parser = UserAgentParser::Parser.new()
|
45
|
+
rescue Exception => e
|
46
|
+
begin
|
47
|
+
path = ::File.expand_path('../../../vendor/regexes.yaml', ::File.dirname(__FILE__))
|
48
|
+
@parser = UserAgentParser::Parser.new(:patterns_path => path)
|
49
|
+
rescue => ex
|
50
|
+
raise "Failed to cache, due to: #{ex}\n"
|
51
|
+
end
|
52
|
+
end
|
53
|
+
else
|
54
|
+
@logger.info("Using user agent regexes", :regexes => @regexes)
|
55
|
+
@parser = UserAgentParser::Parser.new(:patterns_path => @regexes)
|
56
|
+
end
|
57
|
+
end #def register
|
58
|
+
|
59
|
+
public
|
60
|
+
def filter(event)
|
61
|
+
return unless filter?(event)
|
62
|
+
ua_data = nil
|
63
|
+
|
64
|
+
useragent = event[@source]
|
65
|
+
useragent = useragent.first if useragent.is_a? Array
|
66
|
+
|
67
|
+
begin
|
68
|
+
ua_data = @parser.parse(useragent)
|
69
|
+
rescue Exception => e
|
70
|
+
@logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
|
71
|
+
end
|
72
|
+
|
73
|
+
if !ua_data.nil?
|
74
|
+
if @target.nil?
|
75
|
+
# default write to the root of the event
|
76
|
+
target = event
|
77
|
+
else
|
78
|
+
target = event[@target] ||= {}
|
79
|
+
end
|
80
|
+
|
81
|
+
# UserAgentParser outputs as US-ASCII.
|
82
|
+
|
83
|
+
target[@prefix + "name"] = ua_data.name.force_encoding(Encoding::UTF_8)
|
84
|
+
|
85
|
+
#OSX, Andriod and maybe iOS parse correctly, ua-agent parsing for Windows does not provide this level of detail
|
86
|
+
unless ua_data.os.nil?
|
87
|
+
target[@prefix + "os"] = ua_data.os.to_s.force_encoding(Encoding::UTF_8)
|
88
|
+
target[@prefix + "os_name"] = ua_data.os.name.to_s.force_encoding(Encoding::UTF_8)
|
89
|
+
target[@prefix + "os_major"] = ua_data.os.version.major.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
|
90
|
+
target[@prefix + "os_minor"] = ua_data.os.version.minor.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
target[@prefix + "device"] = ua_data.device.to_s.force_encoding(Encoding::UTF_8) if not ua_data.device.nil?
|
94
|
+
|
95
|
+
if not ua_data.version.nil?
|
96
|
+
ua_version = ua_data.version
|
97
|
+
target[@prefix + "major"] = ua_version.major.force_encoding(Encoding::UTF_8) if ua_version.major
|
98
|
+
target[@prefix + "minor"] = ua_version.minor.force_encoding(Encoding::UTF_8) if ua_version.minor
|
99
|
+
target[@prefix + "patch"] = ua_version.patch.force_encoding(Encoding::UTF_8) if ua_version.patch
|
100
|
+
target[@prefix + "build"] = ua_version.patch_minor.force_encoding(Encoding::UTF_8) if ua_version.patch_minor
|
101
|
+
end
|
102
|
+
|
103
|
+
filter_matched(event)
|
104
|
+
end
|
105
|
+
|
106
|
+
end # def filter
|
107
|
+
end # class LogStash::Filters::UserAgent
|
108
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-filter-useragent'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "Parse user agent strings into structured data based on BrowserScope data"
|
7
|
+
s.description = "UserAgent filter, adds information about user agent like family, operating system, version, and device"
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
|
25
|
+
s.add_runtime_dependency 'user_agent_parser', ['>= 2.0.0']
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require "spec_helper"
|
4
|
+
require "logstash/filters/useragent"
|
5
|
+
|
6
|
+
describe LogStash::Filters::UserAgent do
|
7
|
+
|
8
|
+
describe "defaults" do
|
9
|
+
config <<-CONFIG
|
10
|
+
filter {
|
11
|
+
useragent {
|
12
|
+
source => "message"
|
13
|
+
target => "ua"
|
14
|
+
}
|
15
|
+
}
|
16
|
+
CONFIG
|
17
|
+
|
18
|
+
sample "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.63 Safari/537.31" do
|
19
|
+
insist { subject }.include?("ua")
|
20
|
+
insist { subject["ua"]["name"] } == "Chrome"
|
21
|
+
insist { subject["ua"]["os"] } == "Linux"
|
22
|
+
insist { subject["ua"]["major"] } == "26"
|
23
|
+
insist { subject["ua"]["minor"] } == "0"
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "Without target field" do
|
28
|
+
config <<-CONFIG
|
29
|
+
filter {
|
30
|
+
useragent {
|
31
|
+
source => "message"
|
32
|
+
}
|
33
|
+
}
|
34
|
+
CONFIG
|
35
|
+
|
36
|
+
sample "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.63 Safari/537.31" do
|
37
|
+
insist { subject["name"] } == "Chrome"
|
38
|
+
insist { subject["os"] } == "Linux"
|
39
|
+
insist { subject["major"] } == "26"
|
40
|
+
insist { subject["minor"] } == "0"
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
metadata
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-filter-useragent
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: user_agent_parser
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ! '>='
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: 2.0.0
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ! '>='
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: 2.0.0
|
47
|
+
description: UserAgent filter, adds information about user agent like family, operating
|
48
|
+
system, version, and device
|
49
|
+
email: richard.pijnenburg@elasticsearch.com
|
50
|
+
executables: []
|
51
|
+
extensions: []
|
52
|
+
extra_rdoc_files: []
|
53
|
+
files:
|
54
|
+
- .gitignore
|
55
|
+
- Gemfile
|
56
|
+
- Rakefile
|
57
|
+
- lib/logstash/filters/useragent.rb
|
58
|
+
- logstash-filter-useragent.gemspec
|
59
|
+
- rakelib/publish.rake
|
60
|
+
- rakelib/vendor.rake
|
61
|
+
- spec/filters/useragent_spec.rb
|
62
|
+
homepage: http://logstash.net/
|
63
|
+
licenses:
|
64
|
+
- Apache License (2.0)
|
65
|
+
metadata:
|
66
|
+
logstash_plugin: 'true'
|
67
|
+
group: filter
|
68
|
+
post_install_message:
|
69
|
+
rdoc_options: []
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ! '>='
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: '0'
|
77
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - ! '>='
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '0'
|
82
|
+
requirements: []
|
83
|
+
rubyforge_project:
|
84
|
+
rubygems_version: 2.4.1
|
85
|
+
signing_key:
|
86
|
+
specification_version: 4
|
87
|
+
summary: Parse user agent strings into structured data based on BrowserScope data
|
88
|
+
test_files:
|
89
|
+
- spec/filters/useragent_spec.rb
|