logstash-filter-csv 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ OTY1ZjIyMmUwNDhkYjk2NmMyYTM0NDA2Mzc5ZDJhMDczM2ZiNTBkZQ==
5
+ data.tar.gz: !binary |-
6
+ OTgyOGNiMGU4ZjcxNjUzZGQxNGNiYmQwZDQ5YWRjODIxNzM2N2NkZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YjhhNzg3YmViMjFiOWQ0OTE5NmI5MWViOTUxMDE0MWE3MTBlNWEwMmY1NjBi
10
+ NzU1MWNjOGQ4NWJjNDNiMDUzZTkyOWUyYTZmZmUwNjY5MzFiYzliZjc1OGNl
11
+ MmU2MDUwMDhlYmM4NWQ4MGRmZTdlZmEzNzMzZWRhYzZlMTA4MjE=
12
+ data.tar.gz: !binary |-
13
+ OGFlNWM4NWNkZWE1MzUxY2QxMjQ0YmM0NWQzNDVmMjhlMTBmOTgwOWVmNjkx
14
+ OGRkMmViN2ViZmYzODM5NThkZDBjNWFmYTU1MmY1ZjJkMzVmOTI3MmYwYmQ2
15
+ N2ExNGQ1ZTE2ODM5MTAzYWU3YWE5NDViZWM5NmQ2MDQ0MTExMDA=
data/.gitignore ADDED
@@ -0,0 +1,4 @@
1
+ *.gem
2
+ Gemfile.lock
3
+ .bundle
4
+ vendor
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'http://rubygems.org'
2
+ gem 'rake'
3
+ gem 'gem_publisher'
data/Rakefile ADDED
@@ -0,0 +1,6 @@
1
+ @files=[]
2
+
3
+ task :default do
4
+ system("rake -T")
5
+ end
6
+
@@ -0,0 +1,97 @@
1
+ # encoding: utf-8
2
+ require "logstash/filters/base"
3
+ require "logstash/namespace"
4
+
5
+ require "csv"
6
+
7
+ # The CSV filter takes an event field containing CSV data, parses it,
8
+ # and stores it as individual fields (can optionally specify the names).
9
+ # This filter can also parse data with any separator, not just commas.
10
+ class LogStash::Filters::CSV < LogStash::Filters::Base
11
+ config_name "csv"
12
+ milestone 2
13
+
14
+ # The CSV data in the value of the `source` field will be expanded into a
15
+ # data structure.
16
+ config :source, :validate => :string, :default => "message"
17
+
18
+ # Define a list of column names (in the order they appear in the CSV,
19
+ # as if it were a header line). If `columns` is not configured, or there
20
+ # are not enough columns specified, the default column names are
21
+ # "column1", "column2", etc. In the case that there are more columns
22
+ # in the data than specified in this column list, extra columns will be auto-numbered:
23
+ # (e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
24
+ config :columns, :validate => :array, :default => []
25
+
26
+ # Define the column separator value. If this is not specified, the default
27
+ # is a comma ','.
28
+ # Optional.
29
+ config :separator, :validate => :string, :default => ","
30
+
31
+ # Define the character used to quote CSV fields. If this is not specified
32
+ # the default is a double quote '"'.
33
+ # Optional.
34
+ config :quote_char, :validate => :string, :default => '"'
35
+
36
+ # Define target field for placing the data.
37
+ # Defaults to writing to the root of the event.
38
+ config :target, :validate => :string
39
+
40
+ public
41
+ def register
42
+
43
+ # Nothing to do here
44
+
45
+ end # def register
46
+
47
+ public
48
+ def filter(event)
49
+ return unless filter?(event)
50
+
51
+ @logger.debug("Running csv filter", :event => event)
52
+
53
+ matches = 0
54
+
55
+ if event[@source]
56
+ if event[@source].is_a?(String)
57
+ event[@source] = [event[@source]]
58
+ end
59
+
60
+ if event[@source].length > 1
61
+ @logger.warn("csv filter only works on fields of length 1",
62
+ :source => @source, :value => event[@source],
63
+ :event => event)
64
+ return
65
+ end
66
+
67
+ raw = event[@source].first
68
+ begin
69
+ values = CSV.parse_line(raw, :col_sep => @separator, :quote_char => @quote_char)
70
+
71
+ if @target.nil?
72
+ # Default is to write to the root of the event.
73
+ dest = event
74
+ else
75
+ dest = event[@target] ||= {}
76
+ end
77
+
78
+ values.each_index do |i|
79
+ field_name = @columns[i] || "column#{i+1}"
80
+ dest[field_name] = values[i]
81
+ end
82
+
83
+ filter_matched(event)
84
+ rescue => e
85
+ event.tag "_csvparsefailure"
86
+ @logger.warn("Trouble parsing csv", :source => @source, :raw => raw,
87
+ :exception => e)
88
+ return
89
+ end # begin
90
+ end # if event
91
+
92
+ @logger.debug("Event after csv filter", :event => event)
93
+
94
+ end # def filter
95
+
96
+ end # class LogStash::Filters::Csv
97
+
@@ -0,0 +1,26 @@
1
+ Gem::Specification.new do |s|
2
+
3
+ s.name = 'logstash-filter-csv'
4
+ s.version = '0.1.0'
5
+ s.licenses = ['Apache License (2.0)']
6
+ s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
7
+ s.description = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
8
+ s.authors = ["Elasticsearch"]
9
+ s.email = 'richard.pijnenburg@elasticsearch.com'
10
+ s.homepage = "http://logstash.net/"
11
+ s.require_paths = ["lib"]
12
+
13
+ # Files
14
+ s.files = `git ls-files`.split($\)+::Dir.glob('vendor/*')
15
+
16
+ # Tests
17
+ s.test_files = s.files.grep(%r{^(test|spec|features)/})
18
+
19
+ # Special flag to let us know this is actually a logstash plugin
20
+ s.metadata = { "logstash_plugin" => "true", "group" => "filter" }
21
+
22
+ # Gem dependencies
23
+ s.add_runtime_dependency 'logstash', '>= 1.4.0', '< 2.0.0'
24
+
25
+ end
26
+
@@ -0,0 +1,9 @@
1
+ require "gem_publisher"
2
+
3
+ desc "Publish gem to RubyGems.org"
4
+ task :publish_gem do |t|
5
+ gem_file = Dir.glob(File.expand_path('../*.gemspec',File.dirname(__FILE__))).first
6
+ gem = GemPublisher.publish_if_updated(gem_file, :rubygems)
7
+ puts "Published #{gem}" if gem
8
+ end
9
+
@@ -0,0 +1,169 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "digest/sha1"
4
+
5
+ def vendor(*args)
6
+ return File.join("vendor", *args)
7
+ end
8
+
9
+ directory "vendor/" => ["vendor"] do |task, args|
10
+ mkdir task.name
11
+ end
12
+
13
+ def fetch(url, sha1, output)
14
+
15
+ puts "Downloading #{url}"
16
+ actual_sha1 = download(url, output)
17
+
18
+ if actual_sha1 != sha1
19
+ fail "SHA1 does not match (expected '#{sha1}' but got '#{actual_sha1}')"
20
+ end
21
+ end # def fetch
22
+
23
+ def file_fetch(url, sha1)
24
+ filename = File.basename( URI(url).path )
25
+ output = "vendor/#{filename}"
26
+ task output => [ "vendor/" ] do
27
+ begin
28
+ actual_sha1 = file_sha1(output)
29
+ if actual_sha1 != sha1
30
+ fetch(url, sha1, output)
31
+ end
32
+ rescue Errno::ENOENT
33
+ fetch(url, sha1, output)
34
+ end
35
+ end.invoke
36
+
37
+ return output
38
+ end
39
+
40
+ def file_sha1(path)
41
+ digest = Digest::SHA1.new
42
+ fd = File.new(path, "r")
43
+ while true
44
+ begin
45
+ digest << fd.sysread(16384)
46
+ rescue EOFError
47
+ break
48
+ end
49
+ end
50
+ return digest.hexdigest
51
+ ensure
52
+ fd.close if fd
53
+ end
54
+
55
+ def download(url, output)
56
+ uri = URI(url)
57
+ digest = Digest::SHA1.new
58
+ tmp = "#{output}.tmp"
59
+ Net::HTTP.start(uri.host, uri.port, :use_ssl => (uri.scheme == "https")) do |http|
60
+ request = Net::HTTP::Get.new(uri.path)
61
+ http.request(request) do |response|
62
+ fail "HTTP fetch failed for #{url}. #{response}" if [200, 301].include?(response.code)
63
+ size = (response["content-length"].to_i || -1).to_f
64
+ count = 0
65
+ File.open(tmp, "w") do |fd|
66
+ response.read_body do |chunk|
67
+ fd.write(chunk)
68
+ digest << chunk
69
+ if size > 0 && $stdout.tty?
70
+ count += chunk.bytesize
71
+ $stdout.write(sprintf("\r%0.2f%%", count/size * 100))
72
+ end
73
+ end
74
+ end
75
+ $stdout.write("\r \r") if $stdout.tty?
76
+ end
77
+ end
78
+
79
+ File.rename(tmp, output)
80
+
81
+ return digest.hexdigest
82
+ rescue SocketError => e
83
+ puts "Failure while downloading #{url}: #{e}"
84
+ raise
85
+ ensure
86
+ File.unlink(tmp) if File.exist?(tmp)
87
+ end # def download
88
+
89
+ def untar(tarball, &block)
90
+ require "archive/tar/minitar"
91
+ tgz = Zlib::GzipReader.new(File.open(tarball))
92
+ # Pull out typesdb
93
+ tar = Archive::Tar::Minitar::Input.open(tgz)
94
+ tar.each do |entry|
95
+ path = block.call(entry)
96
+ next if path.nil?
97
+ parent = File.dirname(path)
98
+
99
+ mkdir_p parent unless File.directory?(parent)
100
+
101
+ # Skip this file if the output file is the same size
102
+ if entry.directory?
103
+ mkdir path unless File.directory?(path)
104
+ else
105
+ entry_mode = entry.instance_eval { @mode } & 0777
106
+ if File.exists?(path)
107
+ stat = File.stat(path)
108
+ # TODO(sissel): Submit a patch to archive-tar-minitar upstream to
109
+ # expose headers in the entry.
110
+ entry_size = entry.instance_eval { @size }
111
+ # If file sizes are same, skip writing.
112
+ next if stat.size == entry_size && (stat.mode & 0777) == entry_mode
113
+ end
114
+ puts "Extracting #{entry.full_name} from #{tarball} #{entry_mode.to_s(8)}"
115
+ File.open(path, "w") do |fd|
116
+ # eof? check lets us skip empty files. Necessary because the API provided by
117
+ # Archive::Tar::Minitar::Reader::EntryStream only mostly acts like an
118
+ # IO object. Something about empty files in this EntryStream causes
119
+ # IO.copy_stream to throw "can't convert nil into String" on JRuby
120
+ # TODO(sissel): File a bug about this.
121
+ while !entry.eof?
122
+ chunk = entry.read(16384)
123
+ fd.write(chunk)
124
+ end
125
+ #IO.copy_stream(entry, fd)
126
+ end
127
+ File.chmod(entry_mode, path)
128
+ end
129
+ end
130
+ tar.close
131
+ File.unlink(tarball) if File.file?(tarball)
132
+ end # def untar
133
+
134
+ def ungz(file)
135
+
136
+ outpath = file.gsub('.gz', '')
137
+ tgz = Zlib::GzipReader.new(File.open(file))
138
+ begin
139
+ File.open(outpath, "w") do |out|
140
+ IO::copy_stream(tgz, out)
141
+ end
142
+ File.unlink(file)
143
+ rescue
144
+ File.unlink(outpath) if File.file?(outpath)
145
+ raise
146
+ end
147
+ tgz.close
148
+ end
149
+
150
+ desc "Process any vendor files required for this plugin"
151
+ task "vendor" do |task, args|
152
+
153
+ @files.each do |file|
154
+ download = file_fetch(file['url'], file['sha1'])
155
+ if download =~ /.tar.gz/
156
+ prefix = download.gsub('.tar.gz', '').gsub('vendor/', '')
157
+ untar(download) do |entry|
158
+ if !file['files'].nil?
159
+ next unless file['files'].include?(entry.full_name.gsub(prefix, ''))
160
+ out = entry.full_name.split("/").last
161
+ end
162
+ File.join('vendor', out)
163
+ end
164
+ elsif download =~ /.gz/
165
+ ungz(download)
166
+ end
167
+ end
168
+
169
+ end
@@ -0,0 +1,175 @@
1
+ # encoding: utf-8
2
+
3
+ require "spec_helper"
4
+ require "logstash/filters/csv"
5
+
6
+ describe LogStash::Filters::CSV do
7
+
8
+ describe "all defaults" do
9
+ # The logstash config goes here.
10
+ # At this time, only filters are supported.
11
+ config <<-CONFIG
12
+ filter {
13
+ csv { }
14
+ }
15
+ CONFIG
16
+
17
+ sample "big,bird,sesame street" do
18
+ insist { subject["column1"] } == "big"
19
+ insist { subject["column2"] } == "bird"
20
+ insist { subject["column3"] } == "sesame street"
21
+ end
22
+ end
23
+
24
+ describe "custom separator" do
25
+ config <<-CONFIG
26
+ filter {
27
+ csv {
28
+ separator => ";"
29
+ }
30
+ }
31
+ CONFIG
32
+
33
+ sample "big,bird;sesame street" do
34
+ insist { subject["column1"] } == "big,bird"
35
+ insist { subject["column2"] } == "sesame street"
36
+ end
37
+ end
38
+
39
+ describe "custom quote char" do
40
+ config <<-CONFIG
41
+ filter {
42
+ csv {
43
+ quote_char => "'"
44
+ }
45
+ }
46
+ CONFIG
47
+
48
+ sample "big,bird,'sesame street'" do
49
+ insist { subject["column1"] } == "big"
50
+ insist { subject["column2"] } == "bird"
51
+ insist { subject["column3"] } == "sesame street"
52
+ end
53
+ end
54
+
55
+ describe "default quote char" do
56
+ config <<-CONFIG
57
+ filter {
58
+ csv {
59
+ }
60
+ }
61
+ CONFIG
62
+
63
+ sample 'big,bird,"sesame, street"' do
64
+ insist { subject["column1"] } == "big"
65
+ insist { subject["column2"] } == "bird"
66
+ insist { subject["column3"] } == "sesame, street"
67
+ end
68
+ end
69
+ describe "null quote char" do
70
+ config <<-CONFIG
71
+ filter {
72
+ csv {
73
+ quote_char => "\x00"
74
+ }
75
+ }
76
+ CONFIG
77
+
78
+ sample 'big,bird,"sesame" street' do
79
+ insist { subject["column1"] } == 'big'
80
+ insist { subject["column2"] } == 'bird'
81
+ insist { subject["column3"] } == '"sesame" street'
82
+ end
83
+ end
84
+
85
+ describe "given columns" do
86
+ # The logstash config goes here.
87
+ # At this time, only filters are supported.
88
+ config <<-CONFIG
89
+ filter {
90
+ csv {
91
+ columns => ["first", "last", "address" ]
92
+ }
93
+ }
94
+ CONFIG
95
+
96
+ sample "big,bird,sesame street" do
97
+ insist { subject["first"] } == "big"
98
+ insist { subject["last"] } == "bird"
99
+ insist { subject["address"] } == "sesame street"
100
+ end
101
+ end
102
+
103
+ describe "parse csv with more data than defined column names" do
104
+ config <<-CONFIG
105
+ filter {
106
+ csv {
107
+ columns => ["custom1", "custom2"]
108
+ }
109
+ }
110
+ CONFIG
111
+
112
+ sample "val1,val2,val3" do
113
+ insist { subject["custom1"] } == "val1"
114
+ insist { subject["custom2"] } == "val2"
115
+ insist { subject["column3"] } == "val3"
116
+ end
117
+ end
118
+
119
+
120
+ describe "parse csv from a given source with column names" do
121
+ config <<-CONFIG
122
+ filter {
123
+ csv {
124
+ source => "datafield"
125
+ columns => ["custom1", "custom2", "custom3"]
126
+ }
127
+ }
128
+ CONFIG
129
+
130
+ sample("datafield" => "val1,val2,val3") do
131
+ insist { subject["custom1"] } == "val1"
132
+ insist { subject["custom2"] } == "val2"
133
+ insist { subject["custom3"] } == "val3"
134
+ end
135
+ end
136
+
137
+ describe "given target" do
138
+ # The logstash config goes here.
139
+ # At this time, only filters are supported.
140
+ config <<-CONFIG
141
+ filter {
142
+ csv {
143
+ target => "data"
144
+ }
145
+ }
146
+ CONFIG
147
+
148
+ sample "big,bird,sesame street" do
149
+ insist { subject["data"]["column1"] } == "big"
150
+ insist { subject["data"]["column2"] } == "bird"
151
+ insist { subject["data"]["column3"] } == "sesame street"
152
+ end
153
+ end
154
+
155
+ describe "given target and source" do
156
+ # The logstash config goes here.
157
+ # At this time, only filters are supported.
158
+ config <<-CONFIG
159
+ filter {
160
+ csv {
161
+ source => "datain"
162
+ target => "data"
163
+ }
164
+ }
165
+ CONFIG
166
+
167
+ sample("datain" => "big,bird,sesame street") do
168
+ insist { subject["data"]["column1"] } == "big"
169
+ insist { subject["data"]["column2"] } == "bird"
170
+ insist { subject["data"]["column3"] } == "sesame street"
171
+ end
172
+ end
173
+
174
+
175
+ end
metadata ADDED
@@ -0,0 +1,76 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: logstash-filter-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Elasticsearch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-11-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: logstash
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: 1.4.0
20
+ - - <
21
+ - !ruby/object:Gem::Version
22
+ version: 2.0.0
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: 1.4.0
30
+ - - <
31
+ - !ruby/object:Gem::Version
32
+ version: 2.0.0
33
+ description: The CSV filter takes an event field containing CSV data, parses it, and
34
+ stores it as individual fields (can optionally specify the names).
35
+ email: richard.pijnenburg@elasticsearch.com
36
+ executables: []
37
+ extensions: []
38
+ extra_rdoc_files: []
39
+ files:
40
+ - .gitignore
41
+ - Gemfile
42
+ - Rakefile
43
+ - lib/logstash/filters/csv.rb
44
+ - logstash-filter-csv.gemspec
45
+ - rakelib/publish.rake
46
+ - rakelib/vendor.rake
47
+ - spec/filters/csv_spec.rb
48
+ homepage: http://logstash.net/
49
+ licenses:
50
+ - Apache License (2.0)
51
+ metadata:
52
+ logstash_plugin: 'true'
53
+ group: filter
54
+ post_install_message:
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ! '>='
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ! '>='
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubyforge_project:
70
+ rubygems_version: 2.4.1
71
+ signing_key:
72
+ specification_version: 4
73
+ summary: The CSV filter takes an event field containing CSV data, parses it, and stores
74
+ it as individual fields (can optionally specify the names).
75
+ test_files:
76
+ - spec/filters/csv_spec.rb