logstash-output-csv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MjYxNjRjYWVjNGZkMDg2YzdhMGZhNzQ4YzExMjA3OTEwNjVjZTlhNA==
5
+ data.tar.gz: !binary |-
6
+ ZjNmNjY0YjAyYjdmNTkwZjEyNjRiMWJjZTdiYzY1NTNkODE5NzUxMw==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjM1MTgzMTQwZDRhNTk5YTU2YWM3NjQ5ZGI1YWVmMjU4Y2I0NjhkMmQwZjM5
10
+ Mjc1YWQxMTRkMTdhNWEzNjNlNzc1YmJlZjc4YWI4NWU2ZjM2OTRmMWEyMDhi
11
+ NWNiNzg2MjA0MDBkYzVhM2FlMWQ5OWJhMzU1YTQ0ZTczNjQ2NDU=
12
+ data.tar.gz: !binary |-
13
+ N2E1MjFhZTljOGRlOWZjYmQzYjQwZTU1OGFlMGE5NWYwZjgyZDdkNTUxYWVi
14
+ ZjM2MjA4NGIwMDVkMGFkYTcwZWY3MjdhMWY0Nzg4OTFkZWQ3NmU3NTIxOWE4
15
+ OTQ4OTgzYWM1YjM5NDZkZDJjZWYxZWU5NjIxN2I1Mjg3YjY5OWU=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
4
+ gem 'archive-tar-minitar'
data/LICENSE ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012-2014 Elasticsearch <http://www.elasticsearch.org>
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,51 @@
1
+ require "csv"
2
+ require "logstash/namespace"
3
+ require "logstash/outputs/file"
4
+ require "logstash/json"
5
+
6
+ # CSV output.
7
+ #
8
+ # Write events to disk in CSV or other delimited format
9
+ # Based on the file output, many config values are shared
10
+ # Uses the Ruby csv library internally
11
+ class LogStash::Outputs::CSV < LogStash::Outputs::File
12
+
13
+ config_name "csv"
14
+ milestone 1
15
+
16
+ # The field names from the event that should be written to the CSV file.
17
+ # Fields are written to the CSV in the same order as the array.
18
+ # If a field does not exist on the event, an empty string will be written.
19
+ # Supports field reference syntax eg: `fields => ["field1", "[nested][field]"]`.
20
+ config :fields, :validate => :array, :required => true
21
+
22
+ # Options for CSV output. This is passed directly to the Ruby stdlib to_csv function.
23
+ # Full documentation is available on the http://ruby-doc.org/stdlib-2.0.0/libdoc/csv/rdoc/index.html[Ruby CSV documentation page].
24
+ # A typical use case would be to use alternative column or row seperators eg: `csv_options => {"col_sep" => "\t" "row_sep" => "\r\n"}` gives tab seperated data with windows line endings
25
+ config :csv_options, :validate => :hash, :required => false, :default => Hash.new
26
+
27
+ public
28
+ def register
29
+ super
30
+ @csv_options = Hash[@csv_options.map{|(k, v)|[k.to_sym, v]}]
31
+ end
32
+
33
+ public
34
+ def receive(event)
35
+ return unless output?(event)
36
+ path = event.sprintf(@path)
37
+ fd = open(path)
38
+ csv_values = @fields.map {|name| get_value(name, event)}
39
+ fd.write(csv_values.to_csv(@csv_options))
40
+
41
+ flush(fd)
42
+ close_stale_files
43
+ end #def receive
44
+
45
+ private
46
+ def get_value(name, event)
47
+ val = event[name]
48
+ val.is_a?(Hash) ? LogStash::Json.dump(val) : val
49
+ end
50
+ end # class LogStash::Outputs::CSV
51
+
@@ -0,0 +1,28 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-output-csv'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "Write events to disk in CSV or other delimited format"
7
+ s.description = "Write events to disk in CSV or other delimited format"
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "output" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ s.add_runtime_dependency 'logstash-output-file'
26
+ s.add_runtime_dependency 'logstash-filter-json'
27
+ end
28
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,266 @@
1
+ require "csv"
2
+ require "tempfile"
3
+ require "spec_helper"
4
+ require "logstash/outputs/csv"
5
+
6
+ describe LogStash::Outputs::CSV do
7
+
8
+
9
+ describe "Write a single field to a csv file" do
10
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
11
+ config <<-CONFIG
12
+ input {
13
+ generator {
14
+ add_field => ["foo","bar"]
15
+ count => 1
16
+ }
17
+ }
18
+ output {
19
+ csv {
20
+ path => "#{tmpfile.path}"
21
+ fields => "foo"
22
+ }
23
+ }
24
+ CONFIG
25
+
26
+ agent do
27
+ lines = File.readlines(tmpfile.path)
28
+ insist {lines.count} == 1
29
+ insist {lines[0]} == "bar\n"
30
+ end
31
+ end
32
+
33
+ describe "write multiple fields and lines to a csv file" do
34
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
35
+ config <<-CONFIG
36
+ input {
37
+ generator {
38
+ add_field => ["foo", "bar", "baz", "quux"]
39
+ count => 2
40
+ }
41
+ }
42
+ output {
43
+ csv {
44
+ path => "#{tmpfile.path}"
45
+ fields => ["foo", "baz"]
46
+ }
47
+ }
48
+ CONFIG
49
+
50
+ agent do
51
+ lines = File.readlines(tmpfile.path)
52
+ insist {lines.count} == 2
53
+ insist {lines[0]} == "bar,quux\n"
54
+ insist {lines[1]} == "bar,quux\n"
55
+ end
56
+ end
57
+
58
+ describe "missing event fields are empty in csv" do
59
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
60
+ config <<-CONFIG
61
+ input {
62
+ generator {
63
+ add_field => ["foo","bar", "baz", "quux"]
64
+ count => 1
65
+ }
66
+ }
67
+ output {
68
+ csv {
69
+ path => "#{tmpfile.path}"
70
+ fields => ["foo", "not_there", "baz"]
71
+ }
72
+ }
73
+ CONFIG
74
+
75
+ agent do
76
+ lines = File.readlines(tmpfile.path)
77
+ insist {lines.count} == 1
78
+ insist {lines[0]} == "bar,,quux\n"
79
+ end
80
+ end
81
+
82
+ describe "commas are quoted properly" do
83
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
84
+ config <<-CONFIG
85
+ input {
86
+ generator {
87
+ add_field => ["foo","one,two", "baz", "quux"]
88
+ count => 1
89
+ }
90
+ }
91
+ output {
92
+ csv {
93
+ path => "#{tmpfile.path}"
94
+ fields => ["foo", "baz"]
95
+ }
96
+ }
97
+ CONFIG
98
+
99
+ agent do
100
+ lines = File.readlines(tmpfile.path)
101
+ insist {lines.count} == 1
102
+ insist {lines[0]} == "\"one,two\",quux\n"
103
+ end
104
+ end
105
+
106
+ describe "new lines are quoted properly" do
107
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
108
+ config <<-CONFIG
109
+ input {
110
+ generator {
111
+ add_field => ["foo","one\ntwo", "baz", "quux"]
112
+ count => 1
113
+ }
114
+ }
115
+ output {
116
+ csv {
117
+ path => "#{tmpfile.path}"
118
+ fields => ["foo", "baz"]
119
+ }
120
+ }
121
+ CONFIG
122
+
123
+ agent do
124
+ lines = CSV.read(tmpfile.path)
125
+ insist {lines.count} == 1
126
+ insist {lines[0][0]} == "one\ntwo"
127
+ end
128
+ end
129
+
130
+ describe "fields that are are objects are written as JSON" do
131
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
132
+ config <<-CONFIG
133
+ input {
134
+ generator {
135
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
136
+ count => 1
137
+ }
138
+ }
139
+ filter {
140
+ json { source => "message"}
141
+ }
142
+ output {
143
+ csv {
144
+ path => "#{tmpfile.path}"
145
+ fields => ["foo", "baz"]
146
+ }
147
+ }
148
+ CONFIG
149
+
150
+ agent do
151
+ lines = CSV.read(tmpfile.path)
152
+ insist {lines.count} == 1
153
+ insist {lines[0][0]} == '{"one":"two"}'
154
+ end
155
+ end
156
+
157
+ describe "can address nested field using field reference syntax" do
158
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
159
+ config <<-CONFIG
160
+ input {
161
+ generator {
162
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
163
+ count => 1
164
+ }
165
+ }
166
+ filter {
167
+ json { source => "message"}
168
+ }
169
+ output {
170
+ csv {
171
+ path => "#{tmpfile.path}"
172
+ fields => ["[foo][one]", "baz"]
173
+ }
174
+ }
175
+ CONFIG
176
+
177
+ agent do
178
+ lines = CSV.read(tmpfile.path)
179
+ insist {lines.count} == 1
180
+ insist {lines[0][0]} == "two"
181
+ insist {lines[0][1]} == "quux"
182
+ end
183
+ end
184
+
185
+ describe "missing nested field is blank" do
186
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
187
+ config <<-CONFIG
188
+ input {
189
+ generator {
190
+ message => '{"foo":{"one":"two"},"baz": "quux"}'
191
+ count => 1
192
+ }
193
+ }
194
+ filter {
195
+ json { source => "message"}
196
+ }
197
+ output {
198
+ csv {
199
+ path => "#{tmpfile.path}"
200
+ fields => ["[foo][missing]", "baz"]
201
+ }
202
+ }
203
+ CONFIG
204
+
205
+ agent do
206
+ lines = File.readlines(tmpfile.path)
207
+ insist {lines.count} == 1
208
+ insist {lines[0]} == ",quux\n"
209
+ end
210
+ end
211
+
212
+ describe "can choose field seperator" do
213
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
214
+ config <<-CONFIG
215
+ input {
216
+ generator {
217
+ message => '{"foo":"one","bar": "two"}'
218
+ count => 1
219
+ }
220
+ }
221
+ filter {
222
+ json { source => "message"}
223
+ }
224
+ output {
225
+ csv {
226
+ path => "#{tmpfile.path}"
227
+ fields => ["foo", "bar"]
228
+ csv_options => {"col_sep" => "|"}
229
+ }
230
+ }
231
+ CONFIG
232
+
233
+ agent do
234
+ lines = File.readlines(tmpfile.path)
235
+ insist {lines.count} == 1
236
+ insist {lines[0]} == "one|two\n"
237
+ end
238
+ end
239
+ describe "can choose line seperator" do
240
+ tmpfile = Tempfile.new('logstash-spec-output-csv')
241
+ config <<-CONFIG
242
+ input {
243
+ generator {
244
+ message => '{"foo":"one","bar": "two"}'
245
+ count => 2
246
+ }
247
+ }
248
+ filter {
249
+ json { source => "message"}
250
+ }
251
+ output {
252
+ csv {
253
+ path => "#{tmpfile.path}"
254
+ fields => ["foo", "bar"]
255
+ csv_options => {"col_sep" => "|" "row_sep" => "\t"}
256
+ }
257
+ }
258
+ CONFIG
259
+
260
+ agent do
261
+ lines = File.readlines(tmpfile.path)
262
+ insist {lines.count} == 1
263
+ insist {lines[0]} == "one|two\tone|two\t"
264
+ end
265
+ end
266
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-output-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ - !ruby/object:Gem::Dependency
34
+ name: logstash-output-file
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ! '>='
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ! '>='
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: logstash-filter-json
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ! '>='
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ description: Write events to disk in CSV or other delimited format
62
+ email: richard.pijnenburg@elasticsearch.com
63
+ executables: []
64
+ extensions: []
65
+ extra_rdoc_files: []
66
+ files:
67
+ - .gitignore
68
+ - Gemfile
69
+ - LICENSE
70
+ - Rakefile
71
+ - lib/logstash/outputs/csv.rb
72
+ - logstash-output-csv.gemspec
73
+ - rakelib/publish.rake
74
+ - rakelib/vendor.rake
75
+ - spec/outputs/csv_spec.rb
76
+ homepage: http://logstash.net/
77
+ licenses:
78
+ - Apache License (2.0)
79
+ metadata:
80
+ logstash_plugin: 'true'
81
+ group: output
82
+ post_install_message:
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ! '>='
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubyforge_project:
98
+ rubygems_version: 2.4.1
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Write events to disk in CSV or other delimited format
102
+ test_files:
103
+ - spec/outputs/csv_spec.rb