logstash-filter-csv 2.0.2 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b2ece5f8729158340a06b5b6cffdc52d04e37282
4
- data.tar.gz: d60433bf57b489f80c8a9ed7e627a2d9c5587151
3
+ metadata.gz: 74d72358f3baf0fb616273e5b0c3d6dac3784aa2
4
+ data.tar.gz: dc732a6835ae9b9c37a647d9e0541ffdc8f8d130
5
5
  SHA512:
6
- metadata.gz: ead9e3a32ba1e4fefcda44cef708ff534a88712f11d5d4e813869211fac97058ea5fb3b6ba61207c49c10ae51ee255fa109afd457524d3215076feb045b0e9a0
7
- data.tar.gz: 5d59f9e644b5ae002da5962325f16ff54051673e5ea20618f51cf9cccd0dd9b060be5d8ddc59f469d3f507a29ba32f2990615b4b2144389622dd066e25b3cdb8
6
+ metadata.gz: caea7cbc014ed27d8e207e257558e086e170a20c120551e11cde96cdbe58424b9b2ce7ed7516a7f0292d8b962abad434b4592a9393e23a60640b987e6f22e0d6
7
+ data.tar.gz: 9d04652e8989ab10a3d5ec11961fcf53a0ee666557065a6f4e6a017399ef9e8b30e34b1302dd06f55f14673cc5614687f050fb6583ee6f496822739a9884ac1a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 2.1.0
2
+ - Added support for not parsing coluns without a defined header.
3
+ - Added support for dropping columns that has no value
4
+ - Added support for type conversion within the filter
5
+ - Fix unnecessary source field mutation. Fixes #18
6
+ - Refactored specs to avoid using sample and insist in favor of rspec3
7
+ helper methods.
8
+
1
9
  ## 2.0.0
2
10
  - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
3
11
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Logstash Plugin
2
2
 
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-csv-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-csv-unit/)
5
+
3
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
7
 
5
8
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
@@ -36,37 +36,49 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
36
36
  # Defaults to writing to the root of the event.
37
37
  config :target, :validate => :string
38
38
 
39
- public
40
- def register
39
+ # Define whether column names should autogenerated or not.
40
+ # Defaults to true. If set to false, columns not having a header specified will not be parsed.
41
+ config :autogenerate_column_names, :validate => :boolean, :default => true
41
42
 
42
- # Nothing to do here
43
+ # Define whether empty columns should be skipped.
44
+ # Defaults to false. If set to true, columns containing no value will not get set.
45
+ config :skip_empty_columns, :validate => :boolean, :default => false
43
46
 
44
- end # def register
47
+ # Define a set of datatype conversions to be applied to columns.
48
+ # Possible conversions are integer, float, date, date_time, boolean
49
+ #
50
+ # # Example:
51
+ # [source,ruby]
52
+ # filter {
53
+ # csv {
54
+ # convert => { "column1" => "integer", "column2" => "boolean" }
55
+ # }
56
+ # }
57
+ config :convert, :validate => :hash, :default => {}
45
58
 
46
- public
47
- def filter(event)
48
-
49
59
 
50
- @logger.debug("Running csv filter", :event => event)
60
+ ##
61
+ # List of valid conversion types used for the convert option
62
+ ##
63
+ VALID_CONVERT_TYPES = [ "integer", "float", "date", "date_time", "boolean" ].freeze
51
64
 
52
- matches = 0
53
65
 
54
- if event[@source]
55
- if event[@source].is_a?(String)
56
- event[@source] = [event[@source]]
66
+ def register
67
+ # validate conversion types to be the valid ones.
68
+ @convert.each_pair do |column, type|
69
+ if !VALID_CONVERT_TYPES.include?(type)
70
+ raise LogStash::ConfigurationError, "#{type} is not a valid conversion type."
57
71
  end
72
+ end
73
+ end # def register
58
74
 
59
- if event[@source].length > 1
60
- @logger.warn("csv filter only works on fields of length 1",
61
- :source => @source, :value => event[@source],
62
- :event => event)
63
- return
64
- end
75
+ def filter(event)
76
+ @logger.debug("Running csv filter", :event => event)
65
77
 
66
- raw = event[@source].first
78
+ if event[@source]
79
+ source = event[@source].clone
67
80
  begin
68
- values = CSV.parse_line(raw, :col_sep => @separator, :quote_char => @quote_char)
69
-
81
+ values = CSV.parse_line(source, :col_sep => @separator, :quote_char => @quote_char)
70
82
  if @target.nil?
71
83
  # Default is to write to the root of the event.
72
84
  dest = event
@@ -75,15 +87,21 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
75
87
  end
76
88
 
77
89
  values.each_index do |i|
78
- field_name = @columns[i] || "column#{i+1}"
79
- dest[field_name] = values[i]
90
+ if !(@skip_empty_columns && (values[i].nil? || values[i].empty?))
91
+ if !ignore_field?(i)
92
+ field_name = @columns[i] ? @columns[i] : "column#{i+1}"
93
+ dest[field_name] = if should_transform?(field_name)
94
+ transform(field_name, values[i])
95
+ else
96
+ values[i]
97
+ end
98
+ end
99
+ end
80
100
  end
81
-
82
101
  filter_matched(event)
83
102
  rescue => e
84
103
  event.tag "_csvparsefailure"
85
- @logger.warn("Trouble parsing csv", :source => @source, :raw => raw,
86
- :exception => e)
104
+ @logger.warn("Trouble parsing csv", :field => @source, :source => source, :exception => e)
87
105
  return
88
106
  end # begin
89
107
  end # if event
@@ -92,5 +110,44 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
92
110
 
93
111
  end # def filter
94
112
 
113
+ private
114
+
115
+ def ignore_field?(index)
116
+ !@columns[index] && !@autogenerate_column_names
117
+ end
118
+
119
+ def should_transform?(field_name)
120
+ !@convert[field_name].nil?
121
+ end
122
+
123
+ def transform(field_name, value)
124
+ transformation = @convert[field_name].to_sym
125
+ converters[transformation].call(value)
126
+ end
127
+
128
+ def converters
129
+ @converters ||= {
130
+ :integer => lambda do |value|
131
+ CSV::Converters[:integer].call(value)
132
+ end,
133
+ :float => lambda do |value|
134
+ CSV::Converters[:float].call(value)
135
+
136
+ end,
137
+ :date => lambda do |value|
138
+ CSV::Converters[:date].call(value)
139
+
140
+ end,
141
+ :date_time => lambda do |value|
142
+ CSV::Converters[:date_time].call(value)
143
+ end,
144
+ :boolean => lambda do |value|
145
+ value = value.strip.downcase
146
+ return false if value == "false"
147
+ return true if value == "true"
148
+ return value
149
+ end
150
+ }
151
+ end
95
152
  end # class LogStash::Filters::Csv
96
153
 
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-csv'
4
- s.version = '2.0.2'
4
+ s.version = '2.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -1,175 +1,249 @@
1
1
  # encoding: utf-8
2
-
3
2
  require "logstash/devutils/rspec/spec_helper"
4
3
  require "logstash/filters/csv"
5
4
 
6
5
  describe LogStash::Filters::CSV do
7
6
 
8
- describe "all defaults" do
9
- # The logstash config goes here.
10
- # At this time, only filters are supported.
11
- config <<-CONFIG
12
- filter {
13
- csv { }
14
- }
15
- CONFIG
16
-
17
- sample "big,bird,sesame street" do
18
- insist { subject["column1"] } == "big"
19
- insist { subject["column2"] } == "bird"
20
- insist { subject["column3"] } == "sesame street"
21
- end
22
- end
7
+ subject(:plugin) { LogStash::Filters::CSV.new(config) }
8
+ let(:config) { Hash.new }
23
9
 
24
- describe "custom separator" do
25
- config <<-CONFIG
26
- filter {
27
- csv {
28
- separator => ";"
29
- }
30
- }
31
- CONFIG
32
-
33
- sample "big,bird;sesame street" do
34
- insist { subject["column1"] } == "big,bird"
35
- insist { subject["column2"] } == "sesame street"
36
- end
37
- end
10
+ let(:doc) { "" }
11
+ let(:event) { LogStash::Event.new("message" => doc) }
38
12
 
39
- describe "custom quote char" do
40
- config <<-CONFIG
41
- filter {
42
- csv {
43
- quote_char => "'"
44
- }
45
- }
46
- CONFIG
47
-
48
- sample "big,bird,'sesame street'" do
49
- insist { subject["column1"] } == "big"
50
- insist { subject["column2"] } == "bird"
51
- insist { subject["column3"] } == "sesame street"
52
- end
53
- end
13
+ describe "registration" do
14
+
15
+ context "when using invalid data types" do
16
+ let(:config) do
17
+ { "convert" => { "custom1" => "integer", "custom3" => "wrong_type" },
18
+ "columns" => ["custom1", "custom2", "custom3"] }
19
+ end
54
20
 
55
- describe "default quote char" do
56
- config <<-CONFIG
57
- filter {
58
- csv {
59
- }
60
- }
61
- CONFIG
62
-
63
- sample 'big,bird,"sesame, street"' do
64
- insist { subject["column1"] } == "big"
65
- insist { subject["column2"] } == "bird"
66
- insist { subject["column3"] } == "sesame, street"
21
+ it "should register" do
22
+ input = LogStash::Plugin.lookup("filter", "csv").new(config)
23
+ expect {input.register}.to raise_error
24
+ end
67
25
  end
68
26
  end
69
- describe "null quote char" do
70
- config <<-CONFIG
71
- filter {
72
- csv {
73
- quote_char => "\x00"
74
- }
75
- }
76
- CONFIG
77
-
78
- sample 'big,bird,"sesame" street' do
79
- insist { subject["column1"] } == 'big'
80
- insist { subject["column2"] } == 'bird'
81
- insist { subject["column3"] } == '"sesame" street'
27
+
28
+ describe "receive" do
29
+
30
+ before(:each) do
31
+ plugin.register
82
32
  end
83
- end
84
33
 
85
- describe "given columns" do
86
- # The logstash config goes here.
87
- # At this time, only filters are supported.
88
- config <<-CONFIG
89
- filter {
90
- csv {
91
- columns => ["first", "last", "address" ]
92
- }
93
- }
94
- CONFIG
95
-
96
- sample "big,bird,sesame street" do
97
- insist { subject["first"] } == "big"
98
- insist { subject["last"] } == "bird"
99
- insist { subject["address"] } == "sesame street"
34
+ describe "all defaults" do
35
+
36
+ let(:config) { Hash.new }
37
+
38
+ let(:doc) { "big,bird,sesame street" }
39
+
40
+ it "extract all the values" do
41
+ plugin.filter(event)
42
+ expect(event["column1"]).to eq("big")
43
+ expect(event["column2"]).to eq("bird")
44
+ expect(event["column3"]).to eq("sesame street")
45
+ end
46
+
47
+ it "should not mutate the source field" do
48
+ plugin.filter(event)
49
+ expect(event["message"]).to be_kind_of(String)
50
+ end
100
51
  end
101
- end
102
52
 
103
- describe "parse csv with more data than defined column names" do
104
- config <<-CONFIG
105
- filter {
106
- csv {
107
- columns => ["custom1", "custom2"]
108
- }
109
- }
110
- CONFIG
111
-
112
- sample "val1,val2,val3" do
113
- insist { subject["custom1"] } == "val1"
114
- insist { subject["custom2"] } == "val2"
115
- insist { subject["column3"] } == "val3"
53
+ describe "custom separator" do
54
+ let(:doc) { "big,bird;sesame street" }
55
+
56
+ let(:config) do
57
+ { "separator" => ";" }
58
+ end
59
+ it "extract all the values" do
60
+ plugin.filter(event)
61
+ expect(event["column1"]).to eq("big,bird")
62
+ expect(event["column2"]).to eq("sesame street")
63
+ end
116
64
  end
117
- end
118
65
 
66
+ describe "quote char" do
67
+ let(:doc) { "big,bird,'sesame street'" }
68
+
69
+ let(:config) do
70
+ { "quote_char" => "'"}
71
+ end
72
+
73
+ it "extract all the values" do
74
+ plugin.filter(event)
75
+ expect(event["column1"]).to eq("big")
76
+ expect(event["column2"]).to eq("bird")
77
+ expect(event["column3"]).to eq("sesame street")
78
+ end
79
+
80
+ context "using the default one" do
81
+ let(:doc) { 'big,bird,"sesame, street"' }
82
+ let(:config) { Hash.new }
83
+
84
+ it "extract all the values" do
85
+ plugin.filter(event)
86
+ expect(event["column1"]).to eq("big")
87
+ expect(event["column2"]).to eq("bird")
88
+ expect(event["column3"]).to eq("sesame, street")
89
+ end
90
+ end
91
+
92
+ context "using a null" do
93
+ let(:doc) { 'big,bird,"sesame" street' }
94
+ let(:config) do
95
+ { "quote_char" => "\x00" }
96
+ end
97
+
98
+ it "extract all the values" do
99
+ plugin.filter(event)
100
+ expect(event["column1"]).to eq("big")
101
+ expect(event["column2"]).to eq("bird")
102
+ expect(event["column3"]).to eq('"sesame" street')
103
+ end
104
+ end
105
+ end
119
106
 
120
- describe "parse csv from a given source with column names" do
121
- config <<-CONFIG
122
- filter {
123
- csv {
124
- source => "datafield"
125
- columns => ["custom1", "custom2", "custom3"]
126
- }
127
- }
128
- CONFIG
129
-
130
- sample("datafield" => "val1,val2,val3") do
131
- insist { subject["custom1"] } == "val1"
132
- insist { subject["custom2"] } == "val2"
133
- insist { subject["custom3"] } == "val3"
107
+ describe "given column names" do
108
+ let(:doc) { "big,bird,sesame street" }
109
+ let(:config) do
110
+ { "columns" => ["first", "last", "address" ] }
111
+ end
112
+
113
+ it "extract all the values" do
114
+ plugin.filter(event)
115
+ expect(event["first"]).to eq("big")
116
+ expect(event["last"]).to eq("bird")
117
+ expect(event["address"]).to eq("sesame street")
118
+ end
119
+
120
+ context "parse csv without autogeneration of names" do
121
+
122
+ let(:doc) { "val1,val2,val3" }
123
+ let(:config) do
124
+ { "autogenerate_column_names" => false,
125
+ "columns" => ["custom1", "custom2"] }
126
+ end
127
+
128
+ it "extract all the values" do
129
+ plugin.filter(event)
130
+ expect(event["custom1"]).to eq("val1")
131
+ expect(event["custom2"]).to eq("val2")
132
+ expect(event["column3"]).to be_falsey
133
+ end
134
+ end
135
+
136
+ context "parse csv skipping empty columns" do
137
+
138
+ let(:doc) { "val1,,val3" }
139
+
140
+ let(:config) do
141
+ { "skip_empty_columns" => true,
142
+ "source" => "datafield",
143
+ "columns" => ["custom1", "custom2", "custom3"] }
144
+ end
145
+
146
+ let(:event) { LogStash::Event.new("datafield" => doc) }
147
+
148
+ it "extract all the values" do
149
+ plugin.filter(event)
150
+ expect(event["custom1"]).to eq("val1")
151
+ expect(event["custom2"]).to be_falsey
152
+ expect(event["custom3"]).to eq("val3")
153
+ end
154
+ end
155
+
156
+ context "parse csv with more data than defined" do
157
+ let(:doc) { "val1,val2,val3" }
158
+ let(:config) do
159
+ { "columns" => ["custom1", "custom2"] }
160
+ end
161
+
162
+ it "extract all the values" do
163
+ plugin.filter(event)
164
+ expect(event["custom1"]).to eq("val1")
165
+ expect(event["custom2"]).to eq("val2")
166
+ expect(event["column3"]).to eq("val3")
167
+ end
168
+ end
169
+
170
+ context "parse csv from a given source" do
171
+ let(:doc) { "val1,val2,val3" }
172
+ let(:config) do
173
+ { "source" => "datafield",
174
+ "columns" => ["custom1", "custom2", "custom3"] }
175
+ end
176
+ let(:event) { LogStash::Event.new("datafield" => doc) }
177
+
178
+ it "extract all the values" do
179
+ plugin.filter(event)
180
+ expect(event["custom1"]).to eq("val1")
181
+ expect(event["custom2"]).to eq("val2")
182
+ expect(event["custom3"]).to eq("val3")
183
+ end
184
+ end
134
185
  end
135
- end
136
186
 
137
- describe "given target" do
138
- # The logstash config goes here.
139
- # At this time, only filters are supported.
140
- config <<-CONFIG
141
- filter {
142
- csv {
143
- target => "data"
144
- }
145
- }
146
- CONFIG
147
-
148
- sample "big,bird,sesame street" do
149
- insist { subject["data"]["column1"] } == "big"
150
- insist { subject["data"]["column2"] } == "bird"
151
- insist { subject["data"]["column3"] } == "sesame street"
187
+ describe "givin target" do
188
+ let(:config) do
189
+ { "target" => "data" }
190
+ end
191
+ let(:doc) { "big,bird,sesame street" }
192
+ let(:event) { LogStash::Event.new("message" => doc) }
193
+
194
+ it "extract all the values" do
195
+ plugin.filter(event)
196
+ expect(event["data"]["column1"]).to eq("big")
197
+ expect(event["data"]["column2"]).to eq("bird")
198
+ expect(event["data"]["column3"]).to eq("sesame street")
199
+ end
200
+
201
+ context "when having also source" do
202
+ let(:config) do
203
+ { "source" => "datain",
204
+ "target" => "data" }
205
+ end
206
+ let(:event) { LogStash::Event.new("datain" => doc) }
207
+ let(:doc) { "big,bird,sesame street" }
208
+
209
+ it "extract all the values" do
210
+ plugin.filter(event)
211
+ expect(event["data"]["column1"]).to eq("big")
212
+ expect(event["data"]["column2"]).to eq("bird")
213
+ expect(event["data"]["column3"]).to eq("sesame street")
214
+ end
215
+ end
152
216
  end
153
- end
154
217
 
155
- describe "given target and source" do
156
- # The logstash config goes here.
157
- # At this time, only filters are supported.
158
- config <<-CONFIG
159
- filter {
160
- csv {
161
- source => "datain"
162
- target => "data"
163
- }
164
- }
165
- CONFIG
166
-
167
- sample("datain" => "big,bird,sesame street") do
168
- insist { subject["data"]["column1"] } == "big"
169
- insist { subject["data"]["column2"] } == "bird"
170
- insist { subject["data"]["column3"] } == "sesame street"
218
+ describe "using field convertion" do
219
+
220
+ let(:config) do
221
+ { "convert" => { "column1" => "integer", "column3" => "boolean" } }
222
+ end
223
+ let(:doc) { "1234,bird,false" }
224
+ let(:event) { LogStash::Event.new("message" => doc) }
225
+
226
+ it "get converted values to the expected type" do
227
+ plugin.filter(event)
228
+ expect(event["column1"]).to eq(1234)
229
+ expect(event["column2"]).to eq("bird")
230
+ expect(event["column3"]).to eq(false)
231
+ end
232
+
233
+ context "when using column names" do
234
+
235
+ let(:config) do
236
+ { "convert" => { "custom1" => "integer", "custom3" => "boolean" },
237
+ "columns" => ["custom1", "custom2", "custom3"] }
238
+ end
239
+
240
+ it "get converted values to the expected type" do
241
+ plugin.filter(event)
242
+ expect(event["custom1"]).to eq(1234)
243
+ expect(event["custom2"]).to eq("bird")
244
+ expect(event["custom3"]).to eq(false)
245
+ end
246
+ end
171
247
  end
172
248
  end
173
-
174
-
175
249
  end
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2015-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: logstash-core
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - '>='
17
18
  - !ruby/object:Gem::Version
@@ -19,10 +20,7 @@ dependencies:
19
20
  - - <
20
21
  - !ruby/object:Gem::Version
21
22
  version: 3.0.0
22
- name: logstash-core
23
- prerelease: false
24
- type: :runtime
25
- version_requirements: !ruby/object:Gem::Requirement
23
+ requirement: !ruby/object:Gem::Requirement
26
24
  requirements:
27
25
  - - '>='
28
26
  - !ruby/object:Gem::Version
@@ -30,20 +28,22 @@ dependencies:
30
28
  - - <
31
29
  - !ruby/object:Gem::Version
32
30
  version: 3.0.0
31
+ prerelease: false
32
+ type: :runtime
33
33
  - !ruby/object:Gem::Dependency
34
- requirement: !ruby/object:Gem::Requirement
34
+ name: logstash-devutils
35
+ version_requirements: !ruby/object:Gem::Requirement
35
36
  requirements:
36
37
  - - '>='
37
38
  - !ruby/object:Gem::Version
38
39
  version: '0'
39
- name: logstash-devutils
40
- prerelease: false
41
- type: :development
42
- version_requirements: !ruby/object:Gem::Requirement
40
+ requirement: !ruby/object:Gem::Requirement
43
41
  requirements:
44
42
  - - '>='
45
43
  - !ruby/object:Gem::Version
46
44
  version: '0'
45
+ prerelease: false
46
+ type: :development
47
47
  description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
48
48
  email: info@elastic.co
49
49
  executables: []