logstash-filter-useragent 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ Njk1YTQ3YTUyMmM2NzU1YjkwMDAyODM0MjNlYjZmMTIzMWRlMWU1Nw==
5
+ data.tar.gz: !binary |-
6
+ MTJhODcyNzc3ZDFkNzdhODI3NWJkODBkYTBiNTNkNDAzZDNjOGQ0NA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YWIwMjUzNTM4MTNhNTYxMmUzOWY5MDhiYjE3OGQ2MzQ4NTdkY2FkMzE2MTk0
10
+ YTg2OWJlM2U0MDliNGRlZWMyM2ZiZDVmMWE3MjEyMTQxMTdkY2E0MmEyNDY3
11
+ OGQ1NGZjMjkwNGRiMDU0MGQxNzRjZTU1NmNkZTVhZDgwMjE0N2M=
12
+ data.tar.gz: !binary |-
13
+ ZTlkODFlMTgzZGIzZDgxZmE4ZDgxYTUxMjQxMDEwYjZjNTIzOWU5ZTZmZmI1
14
+ MmVjOGQ1OWVhMTdmNjViMTUyZWFkZGU3MGE3MzNmN2Q0MTI1MDM4M2QwMGQ5
15
+ MmE2ZDBmM2U1OWE5Nzc2ZDg4MzA4NWMyNDIxNGJmNTUyY2I0ZmY=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[{'url' => 'https://raw.githubusercontent.com/tobie/ua-parser/master/regexes.yaml', 'sha1' => '608202fb817f9c0f728b8bbdf9eb279ba7661dd8'}]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,108 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+ require "tempfile"
5
+
6
+ # Parse user agent strings into structured data based on BrowserScope data
7
+ #
8
+ # UserAgent filter, adds information about user agent like family, operating
9
+ # system, version, and device
10
+ #
11
+ # Logstash releases ship with the regexes.yaml database made available from
12
+ # ua-parser with an Apache 2.0 license. For more details on ua-parser, see
13
+ # <https://github.com/tobie/ua-parser/>.
14
+ class LogStash::Filters::UserAgent < LogStash::Filters::Base
15
+ config_name "useragent"
16
+ milestone 3
17
+
18
+ # The field containing the user agent string. If this field is an
19
+ # array, only the first value will be used.
20
+ config :source, :validate => :string, :required => true
21
+
22
+ # The name of the field to assign user agent data into.
23
+ #
24
+ # If not specified user agent data will be stored in the root of the event.
25
+ config :target, :validate => :string
26
+
27
+ # regexes.yaml file to use
28
+ #
29
+ # If not specified, this will default to the regexes.yaml that ships
30
+ # with logstash.
31
+ #
32
+ # You can find the latest version of this here:
33
+ # <https://github.com/tobie/ua-parser/blob/master/regexes.yaml>
34
+ config :regexes, :validate => :string
35
+
36
+ # A string to prepend to all of the extracted keys
37
+ config :prefix, :validate => :string, :default => ''
38
+
39
+ public
40
+ def register
41
+ require 'user_agent_parser'
42
+ if @regexes.nil?
43
+ begin
44
+ @parser = UserAgentParser::Parser.new()
45
+ rescue Exception => e
46
+ begin
47
+ path = ::File.expand_path('../../../vendor/regexes.yaml', ::File.dirname(__FILE__))
48
+ @parser = UserAgentParser::Parser.new(:patterns_path => path)
49
+ rescue => ex
50
+ raise "Failed to cache, due to: #{ex}\n"
51
+ end
52
+ end
53
+ else
54
+ @logger.info("Using user agent regexes", :regexes => @regexes)
55
+ @parser = UserAgentParser::Parser.new(:patterns_path => @regexes)
56
+ end
57
+ end #def register
58
+
59
+ public
60
+ def filter(event)
61
+ return unless filter?(event)
62
+ ua_data = nil
63
+
64
+ useragent = event[@source]
65
+ useragent = useragent.first if useragent.is_a? Array
66
+
67
+ begin
68
+ ua_data = @parser.parse(useragent)
69
+ rescue Exception => e
70
+ @logger.error("Uknown error while parsing user agent data", :exception => e, :field => @source, :event => event)
71
+ end
72
+
73
+ if !ua_data.nil?
74
+ if @target.nil?
75
+ # default write to the root of the event
76
+ target = event
77
+ else
78
+ target = event[@target] ||= {}
79
+ end
80
+
81
+ # UserAgentParser outputs as US-ASCII.
82
+
83
+ target[@prefix + "name"] = ua_data.name.force_encoding(Encoding::UTF_8)
84
+
85
+ #OSX, Andriod and maybe iOS parse correctly, ua-agent parsing for Windows does not provide this level of detail
86
+ unless ua_data.os.nil?
87
+ target[@prefix + "os"] = ua_data.os.to_s.force_encoding(Encoding::UTF_8)
88
+ target[@prefix + "os_name"] = ua_data.os.name.to_s.force_encoding(Encoding::UTF_8)
89
+ target[@prefix + "os_major"] = ua_data.os.version.major.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
90
+ target[@prefix + "os_minor"] = ua_data.os.version.minor.to_s.force_encoding(Encoding::UTF_8) unless ua_data.os.version.nil?
91
+ end
92
+
93
+ target[@prefix + "device"] = ua_data.device.to_s.force_encoding(Encoding::UTF_8) if not ua_data.device.nil?
94
+
95
+ if not ua_data.version.nil?
96
+ ua_version = ua_data.version
97
+ target[@prefix + "major"] = ua_version.major.force_encoding(Encoding::UTF_8) if ua_version.major
98
+ target[@prefix + "minor"] = ua_version.minor.force_encoding(Encoding::UTF_8) if ua_version.minor
99
+ target[@prefix + "patch"] = ua_version.patch.force_encoding(Encoding::UTF_8) if ua_version.patch
100
+ target[@prefix + "build"] = ua_version.patch_minor.force_encoding(Encoding::UTF_8) if ua_version.patch_minor
101
+ end
102
+
103
+ filter_matched(event)
104
+ end
105
+
106
+ end # def filter
107
+ end # class LogStash::Filters::UserAgent
108
+
@@ -0,0 +1,27 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-useragent'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Parse user agent strings into structured data based on BrowserScope data"
7
+ s.description = "UserAgent filter, adds information about user agent like family, operating system, version, and device"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'user_agent_parser', ['>= 2.0.0']
26
+ end
27
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,43 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+ require "logstash/filters/useragent"
5
+
6
+ describe LogStash::Filters::UserAgent do
7
+
8
+ describe "defaults" do
9
+ config <<-CONFIG
10
+ filter {
11
+ useragent {
12
+ source => "message"
13
+ target => "ua"
14
+ }
15
+ }
16
+ CONFIG
17
+
18
+ sample "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.63 Safari/537.31" do
19
+ insist { subject }.include?("ua")
20
+ insist { subject["ua"]["name"] } == "Chrome"
21
+ insist { subject["ua"]["os"] } == "Linux"
22
+ insist { subject["ua"]["major"] } == "26"
23
+ insist { subject["ua"]["minor"] } == "0"
24
+ end
25
+ end
26
+
27
+ describe "Without target field" do
28
+ config <<-CONFIG
29
+ filter {
30
+ useragent {
31
+ source => "message"
32
+ }
33
+ }
34
+ CONFIG
35
+
36
+ sample "Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.63 Safari/537.31" do
37
+ insist { subject["name"] } == "Chrome"
38
+ insist { subject["os"] } == "Linux"
39
+ insist { subject["major"] } == "26"
40
+ insist { subject["minor"] } == "0"
41
+ end
42
+ end
43
+ end
metadata ADDED
@@ -0,0 +1,89 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-useragent
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: user_agent_parser
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: 2.0.0
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: 2.0.0
47
+ description: UserAgent filter, adds information about user agent like family, operating
48
+ system, version, and device
49
+ email: richard.pijnenburg@elasticsearch.com
50
+ executables: []
51
+ extensions: []
52
+ extra_rdoc_files: []
53
+ files:
54
+ - .gitignore
55
+ - Gemfile
56
+ - Rakefile
57
+ - lib/logstash/filters/useragent.rb
58
+ - logstash-filter-useragent.gemspec
59
+ - rakelib/publish.rake
60
+ - rakelib/vendor.rake
61
+ - spec/filters/useragent_spec.rb
62
+ homepage: http://logstash.net/
63
+ licenses:
64
+ - Apache License (2.0)
65
+ metadata:
66
+ logstash_plugin: 'true'
67
+ group: filter
68
+ post_install_message:
69
+ rdoc_options: []
70
+ require_paths:
71
+ - lib
72
+ required_ruby_version: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ requirements: []
83
+ rubyforge_project:
84
+ rubygems_version: 2.4.1
85
+ signing_key:
86
+ specification_version: 4
87
+ summary: Parse user agent strings into structured data based on BrowserScope data
88
+ test_files:
89
+ - spec/filters/useragent_spec.rb