logstash-filter-csv 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b2ece5f8729158340a06b5b6cffdc52d04e37282
4
- data.tar.gz: d60433bf57b489f80c8a9ed7e627a2d9c5587151
3
+ metadata.gz: 74d72358f3baf0fb616273e5b0c3d6dac3784aa2
4
+ data.tar.gz: dc732a6835ae9b9c37a647d9e0541ffdc8f8d130
5
5
  SHA512:
6
- metadata.gz: ead9e3a32ba1e4fefcda44cef708ff534a88712f11d5d4e813869211fac97058ea5fb3b6ba61207c49c10ae51ee255fa109afd457524d3215076feb045b0e9a0
7
- data.tar.gz: 5d59f9e644b5ae002da5962325f16ff54051673e5ea20618f51cf9cccd0dd9b060be5d8ddc59f469d3f507a29ba32f2990615b4b2144389622dd066e25b3cdb8
6
+ metadata.gz: caea7cbc014ed27d8e207e257558e086e170a20c120551e11cde96cdbe58424b9b2ce7ed7516a7f0292d8b962abad434b4592a9393e23a60640b987e6f22e0d6
7
+ data.tar.gz: 9d04652e8989ab10a3d5ec11961fcf53a0ee666557065a6f4e6a017399ef9e8b30e34b1302dd06f55f14673cc5614687f050fb6583ee6f496822739a9884ac1a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 2.1.0
2
+ - Added support for not parsing coluns without a defined header.
3
+ - Added support for dropping columns that has no value
4
+ - Added support for type conversion within the filter
5
+ - Fix unnecessary source field mutation. Fixes #18
6
+ - Refactored specs to avoid using sample and insist in favor of rspec3
7
+ helper methods.
8
+
1
9
  ## 2.0.0
2
10
  - Plugins were updated to follow the new shutdown semantic, this mainly allows Logstash to instruct input plugins to terminate gracefully,
3
11
  instead of using Thread.raise on the plugins' threads. Ref: https://github.com/elastic/logstash/pull/3895
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # Logstash Plugin
2
2
 
3
+ [![Build
4
+ Status](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-csv-unit/badge/icon)](http://build-eu-00.elastic.co/view/LS%20Plugins/view/LS%20Filters/job/logstash-plugin-filter-csv-unit/)
5
+
3
6
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
4
7
 
5
8
  It is fully free and fully open source. The license is Apache 2.0, meaning you are pretty much free to use it however you want in whatever way.
@@ -36,37 +36,49 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
36
36
  # Defaults to writing to the root of the event.
37
37
  config :target, :validate => :string
38
38
 
39
- public
40
- def register
39
+ # Define whether column names should autogenerated or not.
40
+ # Defaults to true. If set to false, columns not having a header specified will not be parsed.
41
+ config :autogenerate_column_names, :validate => :boolean, :default => true
41
42
 
42
- # Nothing to do here
43
+ # Define whether empty columns should be skipped.
44
+ # Defaults to false. If set to true, columns containing no value will not get set.
45
+ config :skip_empty_columns, :validate => :boolean, :default => false
43
46
 
44
- end # def register
47
+ # Define a set of datatype conversions to be applied to columns.
48
+ # Possible conversions are integer, float, date, date_time, boolean
49
+ #
50
+ # # Example:
51
+ # [source,ruby]
52
+ # filter {
53
+ # csv {
54
+ # convert => { "column1" => "integer", "column2" => "boolean" }
55
+ # }
56
+ # }
57
+ config :convert, :validate => :hash, :default => {}
45
58
 
46
- public
47
- def filter(event)
48
-
49
59
 
50
- @logger.debug("Running csv filter", :event => event)
60
+ ##
61
+ # List of valid conversion types used for the convert option
62
+ ##
63
+ VALID_CONVERT_TYPES = [ "integer", "float", "date", "date_time", "boolean" ].freeze
51
64
 
52
- matches = 0
53
65
 
54
- if event[@source]
55
- if event[@source].is_a?(String)
56
- event[@source] = [event[@source]]
66
+ def register
67
+ # validate conversion types to be the valid ones.
68
+ @convert.each_pair do |column, type|
69
+ if !VALID_CONVERT_TYPES.include?(type)
70
+ raise LogStash::ConfigurationError, "#{type} is not a valid conversion type."
57
71
  end
72
+ end
73
+ end # def register
58
74
 
59
- if event[@source].length > 1
60
- @logger.warn("csv filter only works on fields of length 1",
61
- :source => @source, :value => event[@source],
62
- :event => event)
63
- return
64
- end
75
+ def filter(event)
76
+ @logger.debug("Running csv filter", :event => event)
65
77
 
66
- raw = event[@source].first
78
+ if event[@source]
79
+ source = event[@source].clone
67
80
  begin
68
- values = CSV.parse_line(raw, :col_sep => @separator, :quote_char => @quote_char)
69
-
81
+ values = CSV.parse_line(source, :col_sep => @separator, :quote_char => @quote_char)
70
82
  if @target.nil?
71
83
  # Default is to write to the root of the event.
72
84
  dest = event
@@ -75,15 +87,21 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
75
87
  end
76
88
 
77
89
  values.each_index do |i|
78
- field_name = @columns[i] || "column#{i+1}"
79
- dest[field_name] = values[i]
90
+ if !(@skip_empty_columns && (values[i].nil? || values[i].empty?))
91
+ if !ignore_field?(i)
92
+ field_name = @columns[i] ? @columns[i] : "column#{i+1}"
93
+ dest[field_name] = if should_transform?(field_name)
94
+ transform(field_name, values[i])
95
+ else
96
+ values[i]
97
+ end
98
+ end
99
+ end
80
100
  end
81
-
82
101
  filter_matched(event)
83
102
  rescue => e
84
103
  event.tag "_csvparsefailure"
85
- @logger.warn("Trouble parsing csv", :source => @source, :raw => raw,
86
- :exception => e)
104
+ @logger.warn("Trouble parsing csv", :field => @source, :source => source, :exception => e)
87
105
  return
88
106
  end # begin
89
107
  end # if event
@@ -92,5 +110,44 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
92
110
 
93
111
  end # def filter
94
112
 
113
+ private
114
+
115
+ def ignore_field?(index)
116
+ !@columns[index] && !@autogenerate_column_names
117
+ end
118
+
119
+ def should_transform?(field_name)
120
+ !@convert[field_name].nil?
121
+ end
122
+
123
+ def transform(field_name, value)
124
+ transformation = @convert[field_name].to_sym
125
+ converters[transformation].call(value)
126
+ end
127
+
128
+ def converters
129
+ @converters ||= {
130
+ :integer => lambda do |value|
131
+ CSV::Converters[:integer].call(value)
132
+ end,
133
+ :float => lambda do |value|
134
+ CSV::Converters[:float].call(value)
135
+
136
+ end,
137
+ :date => lambda do |value|
138
+ CSV::Converters[:date].call(value)
139
+
140
+ end,
141
+ :date_time => lambda do |value|
142
+ CSV::Converters[:date_time].call(value)
143
+ end,
144
+ :boolean => lambda do |value|
145
+ value = value.strip.downcase
146
+ return false if value == "false"
147
+ return true if value == "true"
148
+ return value
149
+ end
150
+ }
151
+ end
95
152
  end # class LogStash::Filters::Csv
96
153
 
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-csv'
4
- s.version = '2.0.2'
4
+ s.version = '2.1.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
7
7
  s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
@@ -1,175 +1,249 @@
1
1
  # encoding: utf-8
2
-
3
2
  require "logstash/devutils/rspec/spec_helper"
4
3
  require "logstash/filters/csv"
5
4
 
6
5
  describe LogStash::Filters::CSV do
7
6
 
8
- describe "all defaults" do
9
- # The logstash config goes here.
10
- # At this time, only filters are supported.
11
- config <<-CONFIG
12
- filter {
13
- csv { }
14
- }
15
- CONFIG
16
-
17
- sample "big,bird,sesame street" do
18
- insist { subject["column1"] } == "big"
19
- insist { subject["column2"] } == "bird"
20
- insist { subject["column3"] } == "sesame street"
21
- end
22
- end
7
+ subject(:plugin) { LogStash::Filters::CSV.new(config) }
8
+ let(:config) { Hash.new }
23
9
 
24
- describe "custom separator" do
25
- config <<-CONFIG
26
- filter {
27
- csv {
28
- separator => ";"
29
- }
30
- }
31
- CONFIG
32
-
33
- sample "big,bird;sesame street" do
34
- insist { subject["column1"] } == "big,bird"
35
- insist { subject["column2"] } == "sesame street"
36
- end
37
- end
10
+ let(:doc) { "" }
11
+ let(:event) { LogStash::Event.new("message" => doc) }
38
12
 
39
- describe "custom quote char" do
40
- config <<-CONFIG
41
- filter {
42
- csv {
43
- quote_char => "'"
44
- }
45
- }
46
- CONFIG
47
-
48
- sample "big,bird,'sesame street'" do
49
- insist { subject["column1"] } == "big"
50
- insist { subject["column2"] } == "bird"
51
- insist { subject["column3"] } == "sesame street"
52
- end
53
- end
13
+ describe "registration" do
14
+
15
+ context "when using invalid data types" do
16
+ let(:config) do
17
+ { "convert" => { "custom1" => "integer", "custom3" => "wrong_type" },
18
+ "columns" => ["custom1", "custom2", "custom3"] }
19
+ end
54
20
 
55
- describe "default quote char" do
56
- config <<-CONFIG
57
- filter {
58
- csv {
59
- }
60
- }
61
- CONFIG
62
-
63
- sample 'big,bird,"sesame, street"' do
64
- insist { subject["column1"] } == "big"
65
- insist { subject["column2"] } == "bird"
66
- insist { subject["column3"] } == "sesame, street"
21
+ it "should register" do
22
+ input = LogStash::Plugin.lookup("filter", "csv").new(config)
23
+ expect {input.register}.to raise_error
24
+ end
67
25
  end
68
26
  end
69
- describe "null quote char" do
70
- config <<-CONFIG
71
- filter {
72
- csv {
73
- quote_char => "\x00"
74
- }
75
- }
76
- CONFIG
77
-
78
- sample 'big,bird,"sesame" street' do
79
- insist { subject["column1"] } == 'big'
80
- insist { subject["column2"] } == 'bird'
81
- insist { subject["column3"] } == '"sesame" street'
27
+
28
+ describe "receive" do
29
+
30
+ before(:each) do
31
+ plugin.register
82
32
  end
83
- end
84
33
 
85
- describe "given columns" do
86
- # The logstash config goes here.
87
- # At this time, only filters are supported.
88
- config <<-CONFIG
89
- filter {
90
- csv {
91
- columns => ["first", "last", "address" ]
92
- }
93
- }
94
- CONFIG
95
-
96
- sample "big,bird,sesame street" do
97
- insist { subject["first"] } == "big"
98
- insist { subject["last"] } == "bird"
99
- insist { subject["address"] } == "sesame street"
34
+ describe "all defaults" do
35
+
36
+ let(:config) { Hash.new }
37
+
38
+ let(:doc) { "big,bird,sesame street" }
39
+
40
+ it "extract all the values" do
41
+ plugin.filter(event)
42
+ expect(event["column1"]).to eq("big")
43
+ expect(event["column2"]).to eq("bird")
44
+ expect(event["column3"]).to eq("sesame street")
45
+ end
46
+
47
+ it "should not mutate the source field" do
48
+ plugin.filter(event)
49
+ expect(event["message"]).to be_kind_of(String)
50
+ end
100
51
  end
101
- end
102
52
 
103
- describe "parse csv with more data than defined column names" do
104
- config <<-CONFIG
105
- filter {
106
- csv {
107
- columns => ["custom1", "custom2"]
108
- }
109
- }
110
- CONFIG
111
-
112
- sample "val1,val2,val3" do
113
- insist { subject["custom1"] } == "val1"
114
- insist { subject["custom2"] } == "val2"
115
- insist { subject["column3"] } == "val3"
53
+ describe "custom separator" do
54
+ let(:doc) { "big,bird;sesame street" }
55
+
56
+ let(:config) do
57
+ { "separator" => ";" }
58
+ end
59
+ it "extract all the values" do
60
+ plugin.filter(event)
61
+ expect(event["column1"]).to eq("big,bird")
62
+ expect(event["column2"]).to eq("sesame street")
63
+ end
116
64
  end
117
- end
118
65
 
66
+ describe "quote char" do
67
+ let(:doc) { "big,bird,'sesame street'" }
68
+
69
+ let(:config) do
70
+ { "quote_char" => "'"}
71
+ end
72
+
73
+ it "extract all the values" do
74
+ plugin.filter(event)
75
+ expect(event["column1"]).to eq("big")
76
+ expect(event["column2"]).to eq("bird")
77
+ expect(event["column3"]).to eq("sesame street")
78
+ end
79
+
80
+ context "using the default one" do
81
+ let(:doc) { 'big,bird,"sesame, street"' }
82
+ let(:config) { Hash.new }
83
+
84
+ it "extract all the values" do
85
+ plugin.filter(event)
86
+ expect(event["column1"]).to eq("big")
87
+ expect(event["column2"]).to eq("bird")
88
+ expect(event["column3"]).to eq("sesame, street")
89
+ end
90
+ end
91
+
92
+ context "using a null" do
93
+ let(:doc) { 'big,bird,"sesame" street' }
94
+ let(:config) do
95
+ { "quote_char" => "\x00" }
96
+ end
97
+
98
+ it "extract all the values" do
99
+ plugin.filter(event)
100
+ expect(event["column1"]).to eq("big")
101
+ expect(event["column2"]).to eq("bird")
102
+ expect(event["column3"]).to eq('"sesame" street')
103
+ end
104
+ end
105
+ end
119
106
 
120
- describe "parse csv from a given source with column names" do
121
- config <<-CONFIG
122
- filter {
123
- csv {
124
- source => "datafield"
125
- columns => ["custom1", "custom2", "custom3"]
126
- }
127
- }
128
- CONFIG
129
-
130
- sample("datafield" => "val1,val2,val3") do
131
- insist { subject["custom1"] } == "val1"
132
- insist { subject["custom2"] } == "val2"
133
- insist { subject["custom3"] } == "val3"
107
+ describe "given column names" do
108
+ let(:doc) { "big,bird,sesame street" }
109
+ let(:config) do
110
+ { "columns" => ["first", "last", "address" ] }
111
+ end
112
+
113
+ it "extract all the values" do
114
+ plugin.filter(event)
115
+ expect(event["first"]).to eq("big")
116
+ expect(event["last"]).to eq("bird")
117
+ expect(event["address"]).to eq("sesame street")
118
+ end
119
+
120
+ context "parse csv without autogeneration of names" do
121
+
122
+ let(:doc) { "val1,val2,val3" }
123
+ let(:config) do
124
+ { "autogenerate_column_names" => false,
125
+ "columns" => ["custom1", "custom2"] }
126
+ end
127
+
128
+ it "extract all the values" do
129
+ plugin.filter(event)
130
+ expect(event["custom1"]).to eq("val1")
131
+ expect(event["custom2"]).to eq("val2")
132
+ expect(event["column3"]).to be_falsey
133
+ end
134
+ end
135
+
136
+ context "parse csv skipping empty columns" do
137
+
138
+ let(:doc) { "val1,,val3" }
139
+
140
+ let(:config) do
141
+ { "skip_empty_columns" => true,
142
+ "source" => "datafield",
143
+ "columns" => ["custom1", "custom2", "custom3"] }
144
+ end
145
+
146
+ let(:event) { LogStash::Event.new("datafield" => doc) }
147
+
148
+ it "extract all the values" do
149
+ plugin.filter(event)
150
+ expect(event["custom1"]).to eq("val1")
151
+ expect(event["custom2"]).to be_falsey
152
+ expect(event["custom3"]).to eq("val3")
153
+ end
154
+ end
155
+
156
+ context "parse csv with more data than defined" do
157
+ let(:doc) { "val1,val2,val3" }
158
+ let(:config) do
159
+ { "columns" => ["custom1", "custom2"] }
160
+ end
161
+
162
+ it "extract all the values" do
163
+ plugin.filter(event)
164
+ expect(event["custom1"]).to eq("val1")
165
+ expect(event["custom2"]).to eq("val2")
166
+ expect(event["column3"]).to eq("val3")
167
+ end
168
+ end
169
+
170
+ context "parse csv from a given source" do
171
+ let(:doc) { "val1,val2,val3" }
172
+ let(:config) do
173
+ { "source" => "datafield",
174
+ "columns" => ["custom1", "custom2", "custom3"] }
175
+ end
176
+ let(:event) { LogStash::Event.new("datafield" => doc) }
177
+
178
+ it "extract all the values" do
179
+ plugin.filter(event)
180
+ expect(event["custom1"]).to eq("val1")
181
+ expect(event["custom2"]).to eq("val2")
182
+ expect(event["custom3"]).to eq("val3")
183
+ end
184
+ end
134
185
  end
135
- end
136
186
 
137
- describe "given target" do
138
- # The logstash config goes here.
139
- # At this time, only filters are supported.
140
- config <<-CONFIG
141
- filter {
142
- csv {
143
- target => "data"
144
- }
145
- }
146
- CONFIG
147
-
148
- sample "big,bird,sesame street" do
149
- insist { subject["data"]["column1"] } == "big"
150
- insist { subject["data"]["column2"] } == "bird"
151
- insist { subject["data"]["column3"] } == "sesame street"
187
+ describe "givin target" do
188
+ let(:config) do
189
+ { "target" => "data" }
190
+ end
191
+ let(:doc) { "big,bird,sesame street" }
192
+ let(:event) { LogStash::Event.new("message" => doc) }
193
+
194
+ it "extract all the values" do
195
+ plugin.filter(event)
196
+ expect(event["data"]["column1"]).to eq("big")
197
+ expect(event["data"]["column2"]).to eq("bird")
198
+ expect(event["data"]["column3"]).to eq("sesame street")
199
+ end
200
+
201
+ context "when having also source" do
202
+ let(:config) do
203
+ { "source" => "datain",
204
+ "target" => "data" }
205
+ end
206
+ let(:event) { LogStash::Event.new("datain" => doc) }
207
+ let(:doc) { "big,bird,sesame street" }
208
+
209
+ it "extract all the values" do
210
+ plugin.filter(event)
211
+ expect(event["data"]["column1"]).to eq("big")
212
+ expect(event["data"]["column2"]).to eq("bird")
213
+ expect(event["data"]["column3"]).to eq("sesame street")
214
+ end
215
+ end
152
216
  end
153
- end
154
217
 
155
- describe "given target and source" do
156
- # The logstash config goes here.
157
- # At this time, only filters are supported.
158
- config <<-CONFIG
159
- filter {
160
- csv {
161
- source => "datain"
162
- target => "data"
163
- }
164
- }
165
- CONFIG
166
-
167
- sample("datain" => "big,bird,sesame street") do
168
- insist { subject["data"]["column1"] } == "big"
169
- insist { subject["data"]["column2"] } == "bird"
170
- insist { subject["data"]["column3"] } == "sesame street"
218
+ describe "using field convertion" do
219
+
220
+ let(:config) do
221
+ { "convert" => { "column1" => "integer", "column3" => "boolean" } }
222
+ end
223
+ let(:doc) { "1234,bird,false" }
224
+ let(:event) { LogStash::Event.new("message" => doc) }
225
+
226
+ it "get converted values to the expected type" do
227
+ plugin.filter(event)
228
+ expect(event["column1"]).to eq(1234)
229
+ expect(event["column2"]).to eq("bird")
230
+ expect(event["column3"]).to eq(false)
231
+ end
232
+
233
+ context "when using column names" do
234
+
235
+ let(:config) do
236
+ { "convert" => { "custom1" => "integer", "custom3" => "boolean" },
237
+ "columns" => ["custom1", "custom2", "custom3"] }
238
+ end
239
+
240
+ it "get converted values to the expected type" do
241
+ plugin.filter(event)
242
+ expect(event["custom1"]).to eq(1234)
243
+ expect(event["custom2"]).to eq("bird")
244
+ expect(event["custom3"]).to eq(false)
245
+ end
246
+ end
171
247
  end
172
248
  end
173
-
174
-
175
249
  end
metadata CHANGED
@@ -1,17 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-10-14 00:00:00.000000000 Z
11
+ date: 2015-12-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- requirement: !ruby/object:Gem::Requirement
14
+ name: logstash-core
15
+ version_requirements: !ruby/object:Gem::Requirement
15
16
  requirements:
16
17
  - - '>='
17
18
  - !ruby/object:Gem::Version
@@ -19,10 +20,7 @@ dependencies:
19
20
  - - <
20
21
  - !ruby/object:Gem::Version
21
22
  version: 3.0.0
22
- name: logstash-core
23
- prerelease: false
24
- type: :runtime
25
- version_requirements: !ruby/object:Gem::Requirement
23
+ requirement: !ruby/object:Gem::Requirement
26
24
  requirements:
27
25
  - - '>='
28
26
  - !ruby/object:Gem::Version
@@ -30,20 +28,22 @@ dependencies:
30
28
  - - <
31
29
  - !ruby/object:Gem::Version
32
30
  version: 3.0.0
31
+ prerelease: false
32
+ type: :runtime
33
33
  - !ruby/object:Gem::Dependency
34
- requirement: !ruby/object:Gem::Requirement
34
+ name: logstash-devutils
35
+ version_requirements: !ruby/object:Gem::Requirement
35
36
  requirements:
36
37
  - - '>='
37
38
  - !ruby/object:Gem::Version
38
39
  version: '0'
39
- name: logstash-devutils
40
- prerelease: false
41
- type: :development
42
- version_requirements: !ruby/object:Gem::Requirement
40
+ requirement: !ruby/object:Gem::Requirement
43
41
  requirements:
44
42
  - - '>='
45
43
  - !ruby/object:Gem::Version
46
44
  version: '0'
45
+ prerelease: false
46
+ type: :development
47
47
  description: This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program
48
48
  email: info@elastic.co
49
49
  executables: []