logstash-filter-csv 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTY1ZjIyMmUwNDhkYjk2NmMyYTM0NDA2Mzc5ZDJhMDczM2ZiNTBkZQ==
5
+ data.tar.gz: !binary |-
6
+ OTgyOGNiMGU4ZjcxNjUzZGQxNGNiYmQwZDQ5YWRjODIxNzM2N2NkZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjhhNzg3YmViMjFiOWQ0OTE5NmI5MWViOTUxMDE0MWE3MTBlNWEwMmY1NjBi
10
+ NzU1MWNjOGQ4NWJjNDNiMDUzZTkyOWUyYTZmZmUwNjY5MzFiYzliZjc1OGNl
11
+ MmU2MDUwMDhlYmM4NWQ4MGRmZTdlZmEzNzMzZWRhYzZlMTA4MjE=
12
+ data.tar.gz: !binary |-
13
+ OGFlNWM4NWNkZWE1MzUxY2QxMjQ0YmM0NWQzNDVmMjhlMTBmOTgwOWVmNjkx
14
+ OGRkMmViN2ViZmYzODM5NThkZDBjNWFmYTU1MmY1ZjJkMzVmOTI3MmYwYmQ2
15
+ N2ExNGQ1ZTE2ODM5MTAzYWU3YWE5NDViZWM5NmQ2MDQ0MTExMDA=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,97 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ require "csv"
6
+
7
+ # The CSV filter takes an event field containing CSV data, parses it,
8
+ # and stores it as individual fields (can optionally specify the names).
9
+ # This filter can also parse data with any separator, not just commas.
10
+ class LogStash::Filters::CSV < LogStash::Filters::Base
11
+ config_name "csv"
12
+ milestone 2
13
+
14
+ # The CSV data in the value of the `source` field will be expanded into a
15
+ # data structure.
16
+ config :source, :validate => :string, :default => "message"
17
+
18
+ # Define a list of column names (in the order they appear in the CSV,
19
+ # as if it were a header line). If `columns` is not configured, or there
20
+ # are not enough columns specified, the default column names are
21
+ # "column1", "column2", etc. In the case that there are more columns
22
+ # in the data than specified in this column list, extra columns will be auto-numbered:
23
+ # (e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
24
+ config :columns, :validate => :array, :default => []
25
+
26
+ # Define the column separator value. If this is not specified, the default
27
+ # is a comma ','.
28
+ # Optional.
29
+ config :separator, :validate => :string, :default => ","
30
+
31
+ # Define the character used to quote CSV fields. If this is not specified
32
+ # the default is a double quote '"'.
33
+ # Optional.
34
+ config :quote_char, :validate => :string, :default => '"'
35
+
36
+ # Define target field for placing the data.
37
+ # Defaults to writing to the root of the event.
38
+ config :target, :validate => :string
39
+
40
+ public
41
+ def register
42
+
43
+ # Nothing to do here
44
+
45
+ end # def register
46
+
47
+ public
48
+ def filter(event)
49
+ return unless filter?(event)
50
+
51
+ @logger.debug("Running csv filter", :event => event)
52
+
53
+ matches = 0
54
+
55
+ if event[@source]
56
+ if event[@source].is_a?(String)
57
+ event[@source] = [event[@source]]
58
+ end
59
+
60
+ if event[@source].length > 1
61
+ @logger.warn("csv filter only works on fields of length 1",
62
+ :source => @source, :value => event[@source],
63
+ :event => event)
64
+ return
65
+ end
66
+
67
+ raw = event[@source].first
68
+ begin
69
+ values = CSV.parse_line(raw, :col_sep => @separator, :quote_char => @quote_char)
70
+
71
+ if @target.nil?
72
+ # Default is to write to the root of the event.
73
+ dest = event
74
+ else
75
+ dest = event[@target] ||= {}
76
+ end
77
+
78
+ values.each_index do |i|
79
+ field_name = @columns[i] || "column#{i+1}"
80
+ dest[field_name] = values[i]
81
+ end
82
+
83
+ filter_matched(event)
84
+ rescue => e
85
+ event.tag "_csvparsefailure"
86
+ @logger.warn("Trouble parsing csv", :source => @source, :raw => raw,
87
+ :exception => e)
88
+ return
89
+ end # begin
90
+ end # if event
91
+
92
+ @logger.debug("Event after csv filter", :event => event)
93
+
94
+ end # def filter
95
+
96
+ end # class LogStash::Filters::Csv
97
+
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-csv'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
7
+ s.description = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ end
26
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,175 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+ require "logstash/filters/csv"
5
+
6
+ describe LogStash::Filters::CSV do
7
+
8
+ describe "all defaults" do
9
+ # The logstash config goes here.
10
+ # At this time, only filters are supported.
11
+ config <<-CONFIG
12
+ filter {
13
+ csv { }
14
+ }
15
+ CONFIG
16
+
17
+ sample "big,bird,sesame street" do
18
+ insist { subject["column1"] } == "big"
19
+ insist { subject["column2"] } == "bird"
20
+ insist { subject["column3"] } == "sesame street"
21
+ end
22
+ end
23
+
24
+ describe "custom separator" do
25
+ config <<-CONFIG
26
+ filter {
27
+ csv {
28
+ separator => ";"
29
+ }
30
+ }
31
+ CONFIG
32
+
33
+ sample "big,bird;sesame street" do
34
+ insist { subject["column1"] } == "big,bird"
35
+ insist { subject["column2"] } == "sesame street"
36
+ end
37
+ end
38
+
39
+ describe "custom quote char" do
40
+ config <<-CONFIG
41
+ filter {
42
+ csv {
43
+ quote_char => "'"
44
+ }
45
+ }
46
+ CONFIG
47
+
48
+ sample "big,bird,'sesame street'" do
49
+ insist { subject["column1"] } == "big"
50
+ insist { subject["column2"] } == "bird"
51
+ insist { subject["column3"] } == "sesame street"
52
+ end
53
+ end
54
+
55
+ describe "default quote char" do
56
+ config <<-CONFIG
57
+ filter {
58
+ csv {
59
+ }
60
+ }
61
+ CONFIG
62
+
63
+ sample 'big,bird,"sesame, street"' do
64
+ insist { subject["column1"] } == "big"
65
+ insist { subject["column2"] } == "bird"
66
+ insist { subject["column3"] } == "sesame, street"
67
+ end
68
+ end
69
+ describe "null quote char" do
70
+ config <<-CONFIG
71
+ filter {
72
+ csv {
73
+ quote_char => "\x00"
74
+ }
75
+ }
76
+ CONFIG
77
+
78
+ sample 'big,bird,"sesame" street' do
79
+ insist { subject["column1"] } == 'big'
80
+ insist { subject["column2"] } == 'bird'
81
+ insist { subject["column3"] } == '"sesame" street'
82
+ end
83
+ end
84
+
85
+ describe "given columns" do
86
+ # The logstash config goes here.
87
+ # At this time, only filters are supported.
88
+ config <<-CONFIG
89
+ filter {
90
+ csv {
91
+ columns => ["first", "last", "address" ]
92
+ }
93
+ }
94
+ CONFIG
95
+
96
+ sample "big,bird,sesame street" do
97
+ insist { subject["first"] } == "big"
98
+ insist { subject["last"] } == "bird"
99
+ insist { subject["address"] } == "sesame street"
100
+ end
101
+ end
102
+
103
+ describe "parse csv with more data than defined column names" do
104
+ config <<-CONFIG
105
+ filter {
106
+ csv {
107
+ columns => ["custom1", "custom2"]
108
+ }
109
+ }
110
+ CONFIG
111
+
112
+ sample "val1,val2,val3" do
113
+ insist { subject["custom1"] } == "val1"
114
+ insist { subject["custom2"] } == "val2"
115
+ insist { subject["column3"] } == "val3"
116
+ end
117
+ end
118
+
119
+
120
+ describe "parse csv from a given source with column names" do
121
+ config <<-CONFIG
122
+ filter {
123
+ csv {
124
+ source => "datafield"
125
+ columns => ["custom1", "custom2", "custom3"]
126
+ }
127
+ }
128
+ CONFIG
129
+
130
+ sample("datafield" => "val1,val2,val3") do
131
+ insist { subject["custom1"] } == "val1"
132
+ insist { subject["custom2"] } == "val2"
133
+ insist { subject["custom3"] } == "val3"
134
+ end
135
+ end
136
+
137
+ describe "given target" do
138
+ # The logstash config goes here.
139
+ # At this time, only filters are supported.
140
+ config <<-CONFIG
141
+ filter {
142
+ csv {
143
+ target => "data"
144
+ }
145
+ }
146
+ CONFIG
147
+
148
+ sample "big,bird,sesame street" do
149
+ insist { subject["data"]["column1"] } == "big"
150
+ insist { subject["data"]["column2"] } == "bird"
151
+ insist { subject["data"]["column3"] } == "sesame street"
152
+ end
153
+ end
154
+
155
+ describe "given target and source" do
156
+ # The logstash config goes here.
157
+ # At this time, only filters are supported.
158
+ config <<-CONFIG
159
+ filter {
160
+ csv {
161
+ source => "datain"
162
+ target => "data"
163
+ }
164
+ }
165
+ CONFIG
166
+
167
+ sample("datain" => "big,bird,sesame street") do
168
+ insist { subject["data"]["column1"] } == "big"
169
+ insist { subject["data"]["column2"] } == "bird"
170
+ insist { subject["data"]["column3"] } == "sesame street"
171
+ end
172
+ end
173
+
174
+
175
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ description: The CSV filter takes an event field containing CSV data, parses it, and
34
+ stores it as individual fields (can optionally specify the names).
35
+ email: richard.pijnenburg@elasticsearch.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - .gitignore
41
+ - Gemfile
42
+ - Rakefile
43
+ - lib/logstash/filters/csv.rb
44
+ - logstash-filter-csv.gemspec
45
+ - rakelib/publish.rake
46
+ - rakelib/vendor.rake
47
+ - spec/filters/csv_spec.rb
48
+ homepage: http://logstash.net/
49
+ licenses:
50
+ - Apache License (2.0)
51
+ metadata:
52
+ logstash_plugin: 'true'
53
+ group: filter
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: The CSV filter takes an event field containing CSV data, parses it, and stores
74
+ it as individual fields (can optionally specify the names).
75
+ test_files:
76
+ - spec/filters/csv_spec.rb