logstash-output-csv 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjYxNjRjYWVjNGZkMDg2YzdhMGZhNzQ4YzExMjA3OTEwNjVjZTlhNA==
5
+ data.tar.gz: !binary |-
6
+ ZjNmNjY0YjAyYjdmNTkwZjEyNjRiMWJjZTdiYzY1NTNkODE5NzUxMw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjM1MTgzMTQwZDRhNTk5YTU2YWM3NjQ5ZGI1YWVmMjU4Y2I0NjhkMmQwZjM5
10
+ Mjc1YWQxMTRkMTdhNWEzNjNlNzc1YmJlZjc4YWI4NWU2ZjM2OTRmMWEyMDhi
11
+ NWNiNzg2MjA0MDBkYzVhM2FlMWQ5OWJhMzU1YTQ0ZTczNjQ2NDU=
12
+ data.tar.gz: !binary |-
13
+ N2E1MjFhZTljOGRlOWZjYmQzYjQwZTU1OGFlMGE5NWYwZjgyZDdkNTUxYWVi
14
+ ZjM2MjA4NGIwMDVkMGFkYTcwZWY3MjdhMWY0Nzg4OTFkZWQ3NmU3NTIxOWE4
15
+ OTQ4OTgzYWM1YjM5NDZkZDJjZWYxZWU5NjIxN2I1Mjg3YjY5OWU=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,51 @@
1
+ require "csv"
2
+ require "logstash/namespace"
3
+ require "logstash/outputs/file"
4
+ require "logstash/json"
5
+
6
+ # CSV output.
7
+ #
8
+ # Write events to disk in CSV or other delimited format
9
+ # Based on the file output, many config values are shared
10
+ # Uses the Ruby csv library internally
11
+ class LogStash::Outputs::CSV < LogStash::Outputs::File
12
+
13
+ config_name "csv"
14
+ milestone 1
15
+
16
+ # The field names from the event that should be written to the CSV file.
17
+ # Fields are written to the CSV in the same order as the array.
18
+ # If a field does not exist on the event, an empty string will be written.
19
+ # Supports field reference syntax eg: `fields => ["field1", "[nested][field]"]`.
20
+ config :fields, :validate => :array, :required => true
21
+
22
+ # Options for CSV output. This is passed directly to the Ruby stdlib to_csv function.
23
+ # Full documentation is available on the http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/index.html[Ruby CSV documentation page].
24
+ # A typical use case would be to use alternative column or row seperators eg: `csv_options => {"col_sep" => "\t" "row_sep" => "\r\n"}` gives tab seperated data with windows line endings
25
+ config :csv_options, :validate => :hash, :required => false, :default => Hash.new
26
+
27
+ public
28
+ def register
29
+ super
30
+ @csv_options = Hash[@csv_options.map{|(k, v)|[k.to_sym, v]}]
31
+ end
32
+
33
+ public
34
+ def receive(event)
35
+ return unless output?(event)
36
+ path = event.sprintf(@path)
37
+ fd = open(path)
38
+ csv_values = @fields.map {|name| get_value(name, event)}
39
+ fd.write(csv_values.to_csv(@csv_options))
40
+
41
+ flush(fd)
42
+ close_stale_files
43
+ end #def receive
44
+
45
+ private
46
+ def get_value(name, event)
47
+ val = event[name]
48
+ val.is_a?(Hash) ? LogStash::Json.dump(val) : val
49
+ end
50
+ end # class LogStash::Outputs::CSV
51
+
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-csv'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Write events to disk in CSV or other delimited format"
7
+ s.description = "Write events to disk in CSV or other delimited format"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "output" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'logstash-output-file'
26
+ s.add_runtime_dependency 'logstash-filter-json'
27
+ end
28
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,266 @@
1
+ require "csv"
2
+ require "tempfile"
3
+ require "spec_helper"
4
+ require "logstash/outputs/csv"
5
+
6
+ describe LogStash::Outputs::CSV do
7
+
8
+
9
+ describe "Write a single field to a csv file" do
10
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
11
+ config <<-CONFIG
12
+ input {
13
+ generator {
14
+ add_field => ["foo","bar"]
15
+ count => 1
16
+ }
17
+ }
18
+ output {
19
+ csv {
20
+ path => "#{tmpfile.path}"
21
+ fields => "foo"
22
+ }
23
+ }
24
+ CONFIG
25
+
26
+ agent do
27
+ lines = File.readlines(tmpfile.path)
28
+ insist {lines.count} == 1
29
+ insist {lines[0]} == "bar\n"
30
+ end
31
+ end
32
+
33
+ describe "write multiple fields and lines to a csv file" do
34
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
35
+ config <<-CONFIG
36
+ input {
37
+ generator {
38
+ add_field => ["foo", "bar", "baz", "quux"]
39
+ count => 2
40
+ }
41
+ }
42
+ output {
43
+ csv {
44
+ path => "#{tmpfile.path}"
45
+ fields => ["foo", "baz"]
46
+ }
47
+ }
48
+ CONFIG
49
+
50
+ agent do
51
+ lines = File.readlines(tmpfile.path)
52
+ insist {lines.count} == 2
53
+ insist {lines[0]} == "bar,quux\n"
54
+ insist {lines[1]} == "bar,quux\n"
55
+ end
56
+ end
57
+
58
+ describe "missing event fields are empty in csv" do
59
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
60
+ config <<-CONFIG
61
+ input {
62
+ generator {
63
+ add_field => ["foo","bar", "baz", "quux"]
64
+ count => 1
65
+ }
66
+ }
67
+ output {
68
+ csv {
69
+ path => "#{tmpfile.path}"
70
+ fields => ["foo", "not_there", "baz"]
71
+ }
72
+ }
73
+ CONFIG
74
+
75
+ agent do
76
+ lines = File.readlines(tmpfile.path)
77
+ insist {lines.count} == 1
78
+ insist {lines[0]} == "bar,,quux\n"
79
+ end
80
+ end
81
+
82
+ describe "commas are quoted properly" do
83
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
84
+ config <<-CONFIG
85
+ input {
86
+ generator {
87
+ add_field => ["foo","one,two", "baz", "quux"]
88
+ count => 1
89
+ }
90
+ }
91
+ output {
92
+ csv {
93
+ path => "#{tmpfile.path}"
94
+ fields => ["foo", "baz"]
95
+ }
96
+ }
97
+ CONFIG
98
+
99
+ agent do
100
+ lines = File.readlines(tmpfile.path)
101
+ insist {lines.count} == 1
102
+ insist {lines[0]} == "\"one,two\",quux\n"
103
+ end
104
+ end
105
+
106
+ describe "new lines are quoted properly" do
107
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
108
+ config <<-CONFIG
109
+ input {
110
+ generator {
111
+ add_field => ["foo","one\ntwo", "baz", "quux"]
112
+ count => 1
113
+ }
114
+ }
115
+ output {
116
+ csv {
117
+ path => "#{tmpfile.path}"
118
+ fields => ["foo", "baz"]
119
+ }
120
+ }
121
+ CONFIG
122
+
123
+ agent do
124
+ lines = CSV.read(tmpfile.path)
125
+ insist {lines.count} == 1
126
+ insist {lines[0][0]} == "one\ntwo"
127
+ end
128
+ end
129
+
130
+ describe "fields that are are objects are written as JSON" do
131
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
132
+ config <<-CONFIG
133
+ input {
134
+ generator {
135
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
136
+ count => 1
137
+ }
138
+ }
139
+ filter {
140
+ json { source => "message"}
141
+ }
142
+ output {
143
+ csv {
144
+ path => "#{tmpfile.path}"
145
+ fields => ["foo", "baz"]
146
+ }
147
+ }
148
+ CONFIG
149
+
150
+ agent do
151
+ lines = CSV.read(tmpfile.path)
152
+ insist {lines.count} == 1
153
+ insist {lines[0][0]} == '{"one":"two"}'
154
+ end
155
+ end
156
+
157
+ describe "can address nested field using field reference syntax" do
158
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
159
+ config <<-CONFIG
160
+ input {
161
+ generator {
162
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
163
+ count => 1
164
+ }
165
+ }
166
+ filter {
167
+ json { source => "message"}
168
+ }
169
+ output {
170
+ csv {
171
+ path => "#{tmpfile.path}"
172
+ fields => ["[foo][one]", "baz"]
173
+ }
174
+ }
175
+ CONFIG
176
+
177
+ agent do
178
+ lines = CSV.read(tmpfile.path)
179
+ insist {lines.count} == 1
180
+ insist {lines[0][0]} == "two"
181
+ insist {lines[0][1]} == "quux"
182
+ end
183
+ end
184
+
185
+ describe "missing nested field is blank" do
186
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
187
+ config <<-CONFIG
188
+ input {
189
+ generator {
190
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
191
+ count => 1
192
+ }
193
+ }
194
+ filter {
195
+ json { source => "message"}
196
+ }
197
+ output {
198
+ csv {
199
+ path => "#{tmpfile.path}"
200
+ fields => ["[foo][missing]", "baz"]
201
+ }
202
+ }
203
+ CONFIG
204
+
205
+ agent do
206
+ lines = File.readlines(tmpfile.path)
207
+ insist {lines.count} == 1
208
+ insist {lines[0]} == ",quux\n"
209
+ end
210
+ end
211
+
212
+ describe "can choose field seperator" do
213
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
214
+ config <<-CONFIG
215
+ input {
216
+ generator {
217
+ message => '{"foo":"one","bar": "two"}'
218
+ count => 1
219
+ }
220
+ }
221
+ filter {
222
+ json { source => "message"}
223
+ }
224
+ output {
225
+ csv {
226
+ path => "#{tmpfile.path}"
227
+ fields => ["foo", "bar"]
228
+ csv_options => {"col_sep" => "|"}
229
+ }
230
+ }
231
+ CONFIG
232
+
233
+ agent do
234
+ lines = File.readlines(tmpfile.path)
235
+ insist {lines.count} == 1
236
+ insist {lines[0]} == "one|two\n"
237
+ end
238
+ end
239
+ describe "can choose line seperator" do
240
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
241
+ config <<-CONFIG
242
+ input {
243
+ generator {
244
+ message => '{"foo":"one","bar": "two"}'
245
+ count => 2
246
+ }
247
+ }
248
+ filter {
249
+ json { source => "message"}
250
+ }
251
+ output {
252
+ csv {
253
+ path => "#{tmpfile.path}"
254
+ fields => ["foo", "bar"]
255
+ csv_options => {"col_sep" => "|" "row_sep" => "\t"}
256
+ }
257
+ }
258
+ CONFIG
259
+
260
+ agent do
261
+ lines = File.readlines(tmpfile.path)
262
+ insist {lines.count} == 1
263
+ insist {lines[0]} == "one|two\tone|two\t"
264
+ end
265
+ end
266
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-output-file
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-filter-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Write events to disk in CSV or other delimited format
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - LICENSE
70
+ - Rakefile
71
+ - lib/logstash/outputs/csv.rb
72
+ - logstash-output-csv.gemspec
73
+ - rakelib/publish.rake
74
+ - rakelib/vendor.rake
75
+ - spec/outputs/csv_spec.rb
76
+ homepage: http://logstash.net/
77
+ licenses:
78
+ - Apache License (2.0)
79
+ metadata:
80
+ logstash_plugin: 'true'
81
+ group: output
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.4.1
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Write events to disk in CSV or other delimited format
102
+ test_files:
103
+ - spec/outputs/csv_spec.rb