logstash-filter-split 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +4 -0
- data/Gemfile +4 -0
- data/Rakefile +6 -0
- data/lib/logstash/filters/split.rb +62 -0
- data/logstash-filter-split.gemspec +26 -0
- data/rakelib/publish.rake +9 -0
- data/rakelib/vendor.rake +169 -0
- data/spec/filters/split_spec.rb +59 -0
- metadata +74 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NTczYTg0Nzk1ZTcwYzQ5OWU2MjEyOTY4NzdlMjJmY2NmNWRiNjU0Zg==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MzE0ZGQzMDIwZTFmNmJjZTQwMzI1ZmVjYzVhNjE2Y2M1ZTcyOTViMQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
NWUyYTQ3MDhjMmEzMGJjM2I0ZTlkMTUyYWI0MGU3N2U4ZjQzNzNhNTUwNzZm
|
10
|
+
YTk3NDVmYjQ0NTZkMDViYjcxYTIwODFlNzFhOWUzNTk0YTZkMTA5OWUwNzll
|
11
|
+
YmMwZDFkMWIwZDJjOWJhMWY1NGEyNWQ2ZTU0OWYxZGUzMGJjYmY=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
ZGVhY2I4NTM0YTY2MTNlOWYzMzBlNTM5NTMzMzllZDI2NWE5NmVmZWM5MjVj
|
14
|
+
N2FmN2M0OGYzZTg0YmM4MjY5YTBiNjc3NGNlNjJkZjE1ZDZlMGFkYWZjNjUy
|
15
|
+
YzY3NDUwMWNhYjljMTc5YTdjNTMyMjBiYjAyMTE2NDA1OTM1ZTk=
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Rakefile
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
|
5
|
+
# The split filter is for splitting multiline messages into separate events.
|
6
|
+
#
|
7
|
+
# An example use case of this filter is for taking output from the 'exec' input
|
8
|
+
# which emits one event for the whole output of a command and splitting that
|
9
|
+
# output by newline - making each line an event.
|
10
|
+
#
|
11
|
+
# The end result of each split is a complete copy of the event
|
12
|
+
# with only the current split section of the given field changed.
|
13
|
+
class LogStash::Filters::Split < LogStash::Filters::Base
|
14
|
+
|
15
|
+
config_name "split"
|
16
|
+
milestone 2
|
17
|
+
|
18
|
+
# The string to split on. This is usually a line terminator, but can be any
|
19
|
+
# string.
|
20
|
+
config :terminator, :validate => :string, :default => "\n"
|
21
|
+
|
22
|
+
# The field which value is split by the terminator
|
23
|
+
config :field, :validate => :string, :default => "message"
|
24
|
+
|
25
|
+
public
|
26
|
+
def register
|
27
|
+
# Nothing to do
|
28
|
+
end # def register
|
29
|
+
|
30
|
+
public
|
31
|
+
def filter(event)
|
32
|
+
return unless filter?(event)
|
33
|
+
|
34
|
+
original_value = event[@field]
|
35
|
+
|
36
|
+
# If for some reason the field is an array of values, take the first only.
|
37
|
+
original_value = original_value.first if original_value.is_a?(Array)
|
38
|
+
|
39
|
+
# Using -1 for 'limit' on String#split makes ruby not drop trailing empty
|
40
|
+
# splits.
|
41
|
+
splits = original_value.split(@terminator, -1)
|
42
|
+
|
43
|
+
# Skip filtering if splitting this event resulted in only one thing found.
|
44
|
+
return if splits.length == 1
|
45
|
+
#or splits[1].empty?
|
46
|
+
|
47
|
+
splits.each do |value|
|
48
|
+
next if value.empty?
|
49
|
+
|
50
|
+
event_split = event.clone
|
51
|
+
@logger.debug("Split event", :value => value, :field => @field)
|
52
|
+
event_split[@field] = value
|
53
|
+
filter_matched(event_split)
|
54
|
+
|
55
|
+
# Push this new event onto the stack at the LogStash::FilterWorker
|
56
|
+
yield event_split
|
57
|
+
end
|
58
|
+
|
59
|
+
# Cancel this event, we'll use the newly generated ones above.
|
60
|
+
event.cancel
|
61
|
+
end # def filter
|
62
|
+
end # class LogStash::Filters::Split
|
@@ -0,0 +1,26 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
|
3
|
+
s.name = 'logstash-filter-split'
|
4
|
+
s.version = '0.1.0'
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = "The split filter is for splitting multiline messages into separate events."
|
7
|
+
s.description = "The split filter is for splitting multiline messages into separate events."
|
8
|
+
s.authors = ["Elasticsearch"]
|
9
|
+
s.email = 'richard.pijnenburg@elasticsearch.com'
|
10
|
+
s.homepage = "http://logstash.net/"
|
11
|
+
s.require_paths = ["lib"]
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = `git ls-files`.split($\)
|
15
|
+
|
16
|
+
# Tests
|
17
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
18
|
+
|
19
|
+
# Special flag to let us know this is actually a logstash plugin
|
20
|
+
s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
|
21
|
+
|
22
|
+
# Gem dependencies
|
23
|
+
s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
|
24
|
+
|
25
|
+
end
|
26
|
+
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require "gem_publisher"
|
2
|
+
|
3
|
+
desc "Publish gem to RubyGems.org"
|
4
|
+
task :publish_gem do |t|
|
5
|
+
gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
|
6
|
+
gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
|
7
|
+
puts "Published #{gem}" if gem
|
8
|
+
end
|
9
|
+
|
data/rakelib/vendor.rake
ADDED
@@ -0,0 +1,169 @@
|
|
1
|
+
require "net/http"
|
2
|
+
require "uri"
|
3
|
+
require "digest/sha1"
|
4
|
+
|
5
|
+
def vendor(*args)
|
6
|
+
return File.join("vendor", *args)
|
7
|
+
end
|
8
|
+
|
9
|
+
directory "vendor/" => ["vendor"] do |task, args|
|
10
|
+
mkdir task.name
|
11
|
+
end
|
12
|
+
|
13
|
+
def fetch(url, sha1, output)
|
14
|
+
|
15
|
+
puts "Downloading #{url}"
|
16
|
+
actual_sha1 = download(url, output)
|
17
|
+
|
18
|
+
if actual_sha1 != sha1
|
19
|
+
fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
|
20
|
+
end
|
21
|
+
end # def fetch
|
22
|
+
|
23
|
+
def file_fetch(url, sha1)
|
24
|
+
filename = File.basename( URI(url).path )
|
25
|
+
output = "vendor/#{filename}"
|
26
|
+
task output => [ "vendor/" ] do
|
27
|
+
begin
|
28
|
+
actual_sha1 = file_sha1(output)
|
29
|
+
if actual_sha1 != sha1
|
30
|
+
fetch(url, sha1, output)
|
31
|
+
end
|
32
|
+
rescue Errno::ENOENT
|
33
|
+
fetch(url, sha1, output)
|
34
|
+
end
|
35
|
+
end.invoke
|
36
|
+
|
37
|
+
return output
|
38
|
+
end
|
39
|
+
|
40
|
+
def file_sha1(path)
|
41
|
+
digest = Digest::SHA1.new
|
42
|
+
fd = File.new(path, "r")
|
43
|
+
while true
|
44
|
+
begin
|
45
|
+
digest << fd.sysread(16384)
|
46
|
+
rescue EOFError
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
return digest.hexdigest
|
51
|
+
ensure
|
52
|
+
fd.close if fd
|
53
|
+
end
|
54
|
+
|
55
|
+
def download(url, output)
|
56
|
+
uri = URI(url)
|
57
|
+
digest = Digest::SHA1.new
|
58
|
+
tmp = "#{output}.tmp"
|
59
|
+
Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
|
60
|
+
request = Net::HTTP::Get.new(uri.path)
|
61
|
+
http.request(request) do |response|
|
62
|
+
fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
|
63
|
+
size = (response["content-length"].to_i || -1).to_f
|
64
|
+
count = 0
|
65
|
+
File.open(tmp, "w") do |fd|
|
66
|
+
response.read_body do |chunk|
|
67
|
+
fd.write(chunk)
|
68
|
+
digest << chunk
|
69
|
+
if size > 0 && $stdout.tty?
|
70
|
+
count += chunk.bytesize
|
71
|
+
$stdout.write(sprintf("\r%0.2f%%", count/size * 100))
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
$stdout.write("\r \r") if $stdout.tty?
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
File.rename(tmp, output)
|
80
|
+
|
81
|
+
return digest.hexdigest
|
82
|
+
rescue SocketError => e
|
83
|
+
puts "Failure while downloading #{url}: #{e}"
|
84
|
+
raise
|
85
|
+
ensure
|
86
|
+
File.unlink(tmp) if File.exist?(tmp)
|
87
|
+
end # def download
|
88
|
+
|
89
|
+
def untar(tarball, &block)
|
90
|
+
require "archive/tar/minitar"
|
91
|
+
tgz = Zlib::GzipReader.new(File.open(tarball))
|
92
|
+
# Pull out typesdb
|
93
|
+
tar = Archive::Tar::Minitar::Input.open(tgz)
|
94
|
+
tar.each do |entry|
|
95
|
+
path = block.call(entry)
|
96
|
+
next if path.nil?
|
97
|
+
parent = File.dirname(path)
|
98
|
+
|
99
|
+
mkdir_p parent unless File.directory?(parent)
|
100
|
+
|
101
|
+
# Skip this file if the output file is the same size
|
102
|
+
if entry.directory?
|
103
|
+
mkdir path unless File.directory?(path)
|
104
|
+
else
|
105
|
+
entry_mode = entry.instance_eval { @mode } & 0777
|
106
|
+
if File.exists?(path)
|
107
|
+
stat = File.stat(path)
|
108
|
+
# TODO(sissel): Submit a patch to archive-tar-minitar upstream to
|
109
|
+
# expose headers in the entry.
|
110
|
+
entry_size = entry.instance_eval { @size }
|
111
|
+
# If file sizes are same, skip writing.
|
112
|
+
next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
|
113
|
+
end
|
114
|
+
puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
|
115
|
+
File.open(path, "w") do |fd|
|
116
|
+
# eof? check lets us skip empty files. Necessary because the API provided by
|
117
|
+
# Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
|
118
|
+
# IO object. Something about empty files in this EntryStream causes
|
119
|
+
# IO.copy_stream to throw "can't convert nil into String" on JRuby
|
120
|
+
# TODO(sissel): File a bug about this.
|
121
|
+
while !entry.eof?
|
122
|
+
chunk = entry.read(16384)
|
123
|
+
fd.write(chunk)
|
124
|
+
end
|
125
|
+
#IO.copy_stream(entry, fd)
|
126
|
+
end
|
127
|
+
File.chmod(entry_mode, path)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
tar.close
|
131
|
+
File.unlink(tarball) if File.file?(tarball)
|
132
|
+
end # def untar
|
133
|
+
|
134
|
+
def ungz(file)
|
135
|
+
|
136
|
+
outpath = file.gsub('.gz', '')
|
137
|
+
tgz = Zlib::GzipReader.new(File.open(file))
|
138
|
+
begin
|
139
|
+
File.open(outpath, "w") do |out|
|
140
|
+
IO::copy_stream(tgz, out)
|
141
|
+
end
|
142
|
+
File.unlink(file)
|
143
|
+
rescue
|
144
|
+
File.unlink(outpath) if File.file?(outpath)
|
145
|
+
raise
|
146
|
+
end
|
147
|
+
tgz.close
|
148
|
+
end
|
149
|
+
|
150
|
+
desc "Process any vendor files required for this plugin"
|
151
|
+
task "vendor" do |task, args|
|
152
|
+
|
153
|
+
@files.each do |file|
|
154
|
+
download = file_fetch(file['url'], file['sha1'])
|
155
|
+
if download =~ /.tar.gz/
|
156
|
+
prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
|
157
|
+
untar(download) do |entry|
|
158
|
+
if !file['files'].nil?
|
159
|
+
next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
|
160
|
+
out = entry.full_name.split("/").last
|
161
|
+
end
|
162
|
+
File.join('vendor', out)
|
163
|
+
end
|
164
|
+
elsif download =~ /.gz/
|
165
|
+
ungz(download)
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
require "logstash/filters/split"
|
4
|
+
|
5
|
+
describe LogStash::Filters::Split do
|
6
|
+
|
7
|
+
describe "all defaults" do
|
8
|
+
config <<-CONFIG
|
9
|
+
filter {
|
10
|
+
split { }
|
11
|
+
}
|
12
|
+
CONFIG
|
13
|
+
|
14
|
+
sample "big\nbird\nsesame street" do
|
15
|
+
insist { subject.length } == 3
|
16
|
+
insist { subject[0]["message"] } == "big"
|
17
|
+
insist { subject[1]["message"] } == "bird"
|
18
|
+
insist { subject[2]["message"] } == "sesame street"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "custome terminator" do
|
23
|
+
config <<-CONFIG
|
24
|
+
filter {
|
25
|
+
split {
|
26
|
+
terminator => "\t"
|
27
|
+
}
|
28
|
+
}
|
29
|
+
CONFIG
|
30
|
+
|
31
|
+
sample "big\tbird\tsesame street" do
|
32
|
+
insist { subject.length } == 3
|
33
|
+
insist { subject[0]["message"] } == "big"
|
34
|
+
insist { subject[1]["message"] } == "bird"
|
35
|
+
insist { subject[2]["message"] } == "sesame street"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
describe "custom field" do
|
40
|
+
config <<-CONFIG
|
41
|
+
filter {
|
42
|
+
split {
|
43
|
+
field => "custom"
|
44
|
+
}
|
45
|
+
}
|
46
|
+
CONFIG
|
47
|
+
|
48
|
+
sample("custom" => "big\nbird\nsesame street", "do_not_touch" => "1\n2\n3") do
|
49
|
+
insist { subject.length } == 3
|
50
|
+
subject.each do |s|
|
51
|
+
insist { s["do_not_touch"] } == "1\n2\n3"
|
52
|
+
end
|
53
|
+
insist { subject[0]["custom"] } == "big"
|
54
|
+
insist { subject[1]["custom"] } == "bird"
|
55
|
+
insist { subject[2]["custom"] } == "sesame street"
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
metadata
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: logstash-filter-split
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Elasticsearch
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-02 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: logstash
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ! '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.4.0
|
20
|
+
- - <
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.0.0
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.0
|
30
|
+
- - <
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.0.0
|
33
|
+
description: The split filter is for splitting multiline messages into separate events.
|
34
|
+
email: richard.pijnenburg@elasticsearch.com
|
35
|
+
executables: []
|
36
|
+
extensions: []
|
37
|
+
extra_rdoc_files: []
|
38
|
+
files:
|
39
|
+
- .gitignore
|
40
|
+
- Gemfile
|
41
|
+
- Rakefile
|
42
|
+
- lib/logstash/filters/split.rb
|
43
|
+
- logstash-filter-split.gemspec
|
44
|
+
- rakelib/publish.rake
|
45
|
+
- rakelib/vendor.rake
|
46
|
+
- spec/filters/split_spec.rb
|
47
|
+
homepage: http://logstash.net/
|
48
|
+
licenses:
|
49
|
+
- Apache License (2.0)
|
50
|
+
metadata:
|
51
|
+
logstash_plugin: 'true'
|
52
|
+
group: filter
|
53
|
+
post_install_message:
|
54
|
+
rdoc_options: []
|
55
|
+
require_paths:
|
56
|
+
- lib
|
57
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
requirements: []
|
68
|
+
rubyforge_project:
|
69
|
+
rubygems_version: 2.4.1
|
70
|
+
signing_key:
|
71
|
+
specification_version: 4
|
72
|
+
summary: The split filter is for splitting multiline messages into separate events.
|
73
|
+
test_files:
|
74
|
+
- spec/filters/split_spec.rb
|