logstash-filter-csv 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2e2d4379b16ea4358cfffca7c0922024af7f2e80
4
- data.tar.gz: fc2a74ee00ef3598a94765f142d0d21bea669f45
3
+ metadata.gz: b64bc3605560eab32e1532795bcfa5afc2471da5
4
+ data.tar.gz: 0a17fb598a3ac9932e2e7485309eb3c34f22efd2
5
5
  SHA512:
6
- metadata.gz: 56696d58d95c9703886e6ac1c8fd4e70851550d4cd8f3f38effacea4bd7f4360aee0544a05ae8348dc8ca59bd97ac8835ee9d18c34ab4e13c59ce833b5a5f496
7
- data.tar.gz: b02418749ef7a9b5bd7dc9d1b5a4bbd10b39e266344c7ceb1983801757474e07fc70887a15b42a1a7699e455fcf0ecba473c72f60960565c81692153faa2b6c0
6
+ metadata.gz: c5d0fed6269330d4f0e5de50023f6cba7371f3aecaf4d4996f82a0f5f645a4c4381b627086a2eaef484547ab450b66569bb0e15a4fe71c59f8d5a299d00d24f9
7
+ data.tar.gz: 89563ed47c27a9a50e5047bd8cc09b8cdbb613154c9a96fabc14cc4f6fa2acf372e56a106adabda8f999aaaa710b9148e261e6f4843257eefc03f29708610be5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 3.0.3
2
+ - generate Timestamp objects for correctly converted :date and :date_time fields with related specs.
3
+
1
4
  ## 3.0.2
2
5
  - Relax constraint on logstash-core-plugin-api to >= 1.60 <= 2.99
3
6
 
@@ -0,0 +1,152 @@
1
+ :plugin: csv
2
+ :type: filter
3
+
4
+ ///////////////////////////////////////////
5
+ START - GENERATED VARIABLES, DO NOT EDIT!
6
+ ///////////////////////////////////////////
7
+ :version: %VERSION%
8
+ :release_date: %RELEASE_DATE%
9
+ :changelog_url: %CHANGELOG_URL%
10
+ :include_path: ../../../logstash/docs/include
11
+ ///////////////////////////////////////////
12
+ END - GENERATED VARIABLES, DO NOT EDIT!
13
+ ///////////////////////////////////////////
14
+
15
+ [id="plugins-{type}-{plugin}"]
16
+
17
+ === Csv
18
+
19
+ include::{include_path}/plugin_header.asciidoc[]
20
+
21
+ ==== Description
22
+
23
+ The CSV filter takes an event field containing CSV data, parses it,
24
+ and stores it as individual fields (can optionally specify the names).
25
+ This filter can also parse data with any separator, not just commas.
26
+
27
+ [id="plugins-{type}s-{plugin}-options"]
28
+ ==== Csv Filter Configuration Options
29
+
30
+ This plugin supports the following configuration options plus the <<plugins-{type}s-common-options>> described later.
31
+
32
+ [cols="<,<,<",options="header",]
33
+ |=======================================================================
34
+ |Setting |Input type|Required
35
+ | <<plugins-{type}s-{plugin}-autodetect_column_names>> |<<boolean,boolean>>|No
36
+ | <<plugins-{type}s-{plugin}-autogenerate_column_names>> |<<boolean,boolean>>|No
37
+ | <<plugins-{type}s-{plugin}-columns>> |<<array,array>>|No
38
+ | <<plugins-{type}s-{plugin}-convert>> |<<hash,hash>>|No
39
+ | <<plugins-{type}s-{plugin}-quote_char>> |<<string,string>>|No
40
+ | <<plugins-{type}s-{plugin}-separator>> |<<string,string>>|No
41
+ | <<plugins-{type}s-{plugin}-skip_empty_columns>> |<<boolean,boolean>>|No
42
+ | <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
43
+ | <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
44
+ |=======================================================================
45
+
46
+ Also see <<plugins-{type}s-common-options>> for a list of options supported by all
47
+ filter plugins.
48
+
49
+ &nbsp;
50
+
51
+ [id="plugins-{type}s-{plugin}-autodetect_column_names"]
52
+ ===== `autodetect_column_names`
53
+
54
+ * Value type is <<boolean,boolean>>
55
+ * Default value is `false`
56
+
57
+ Define whether column names should be auto-detected from the header column or not.
58
+ Defaults to false.
59
+
60
+ [id="plugins-{type}s-{plugin}-autogenerate_column_names"]
61
+ ===== `autogenerate_column_names`
62
+
63
+ * Value type is <<boolean,boolean>>
64
+ * Default value is `true`
65
+
66
+ Define whether column names should autogenerated or not.
67
+ Defaults to true. If set to false, columns not having a header specified will not be parsed.
68
+
69
+ [id="plugins-{type}s-{plugin}-columns"]
70
+ ===== `columns`
71
+
72
+ * Value type is <<array,array>>
73
+ * Default value is `[]`
74
+
75
+ Define a list of column names (in the order they appear in the CSV,
76
+ as if it were a header line). If `columns` is not configured, or there
77
+ are not enough columns specified, the default column names are
78
+ "column1", "column2", etc. In the case that there are more columns
79
+ in the data than specified in this column list, extra columns will be auto-numbered:
80
+ (e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
81
+
82
+ [id="plugins-{type}s-{plugin}-convert"]
83
+ ===== `convert`
84
+
85
+ * Value type is <<hash,hash>>
86
+ * Default value is `{}`
87
+
88
+ Define a set of datatype conversions to be applied to columns.
89
+ Possible conversions are integer, float, date, date_time, boolean
90
+
91
+ # Example:
92
+ [source,ruby]
93
+ filter {
94
+ csv {
95
+ convert => {
96
+ "column1" => "integer"
97
+ "column2" => "boolean"
98
+ }
99
+ }
100
+ }
101
+
102
+ [id="plugins-{type}s-{plugin}-quote_char"]
103
+ ===== `quote_char`
104
+
105
+ * Value type is <<string,string>>
106
+ * Default value is `"\""`
107
+
108
+ Define the character used to quote CSV fields. If this is not specified
109
+ the default is a double quote `"`.
110
+ Optional.
111
+
112
+ [id="plugins-{type}s-{plugin}-separator"]
113
+ ===== `separator`
114
+
115
+ * Value type is <<string,string>>
116
+ * Default value is `","`
117
+
118
+ Define the column separator value. If this is not specified, the default
119
+ is a comma `,`. If you want to define a tabulation as a separator, you need
120
+ to set the value to the actual tab character and not `\t`.
121
+ Optional.
122
+
123
+ [id="plugins-{type}s-{plugin}-skip_empty_columns"]
124
+ ===== `skip_empty_columns`
125
+
126
+ * Value type is <<boolean,boolean>>
127
+ * Default value is `false`
128
+
129
+ Define whether empty columns should be skipped.
130
+ Defaults to false. If set to true, columns containing no value will not get set.
131
+
132
+ [id="plugins-{type}s-{plugin}-source"]
133
+ ===== `source`
134
+
135
+ * Value type is <<string,string>>
136
+ * Default value is `"message"`
137
+
138
+ The CSV data in the value of the `source` field will be expanded into a
139
+ data structure.
140
+
141
+ [id="plugins-{type}s-{plugin}-target"]
142
+ ===== `target`
143
+
144
+ * Value type is <<string,string>>
145
+ * There is no default value for this setting.
146
+
147
+ Define target field for placing the data.
148
+ Defaults to writing to the root of the event.
149
+
150
+
151
+
152
+ include::{include_path}/{type}.asciidoc[]
@@ -23,7 +23,8 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
23
23
  config :columns, :validate => :array, :default => []
24
24
 
25
25
  # Define the column separator value. If this is not specified, the default
26
- # is a comma `,`.
26
+ # is a comma `,`. If you want to define a tabulation as a separator, you need
27
+ # to set the value to the actual tab character and not `\t`.
27
28
  # Optional.
28
29
  config :separator, :validate => :string, :default => ","
29
30
 
@@ -51,11 +52,18 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
51
52
  # [source,ruby]
52
53
  # filter {
53
54
  # csv {
54
- # convert => { "column1" => "integer", "column2" => "boolean" }
55
+ # convert => {
56
+ # "column1" => "integer"
57
+ # "column2" => "boolean"
58
+ # }
55
59
  # }
56
60
  # }
57
61
  config :convert, :validate => :hash, :default => {}
58
62
 
63
+ # Define whether column names should be auto-detected from the header column or not.
64
+ # Defaults to false.
65
+ config :autodetect_column_names, :validate => :boolean, :default => false
66
+
59
67
  CONVERTERS = {
60
68
  :integer => lambda do |value|
61
69
  CSV::Converters[:integer].call(value)
@@ -66,11 +74,13 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
66
74
  end,
67
75
 
68
76
  :date => lambda do |value|
69
- CSV::Converters[:date].call(value)
77
+ result = CSV::Converters[:date].call(value)
78
+ result.is_a?(Date) ? LogStash::Timestamp.new(result.to_time) : result
70
79
  end,
71
80
 
72
81
  :date_time => lambda do |value|
73
- CSV::Converters[:date_time].call(value)
82
+ result = CSV::Converters[:date_time].call(value)
83
+ result.is_a?(DateTime) ? LogStash::Timestamp.new(result.to_time) : result
74
84
  end,
75
85
 
76
86
  :boolean => lambda do |value|
@@ -112,6 +122,12 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
112
122
  begin
113
123
  values = CSV.parse_line(source, :col_sep => @separator, :quote_char => @quote_char)
114
124
 
125
+ if (@autodetect_column_names && @columns.empty?)
126
+ @columns = values
127
+ event.cancel
128
+ return
129
+ end
130
+
115
131
  values.each_index do |i|
116
132
  unless (@skip_empty_columns && (values[i].nil? || values[i].empty?))
117
133
  unless ignore_field?(i)
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-filter-csv'
4
- s.version = '3.0.2'
4
+ s.version = '3.0.3'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
11
11
  s.require_paths = ["lib"]
12
12
 
13
13
  # Files
14
- s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','CONTRIBUTORS','Gemfile','LICENSE','NOTICE.TXT']
14
+ s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
15
15
 
16
16
  # Tests
17
17
  s.test_files = s.files.grep(%r{^(test|spec|features)/})
@@ -20,7 +20,7 @@ describe LogStash::Filters::CSV do
20
20
 
21
21
  it "should register" do
22
22
  input = LogStash::Plugin.lookup("filter", "csv").new(config)
23
- expect {input.register}.to raise_error
23
+ expect {input.register}.to raise_error(LogStash::ConfigurationError)
24
24
  end
25
25
  end
26
26
  end
@@ -232,18 +232,64 @@ describe LogStash::Filters::CSV do
232
232
  describe "using field convertion" do
233
233
 
234
234
  let(:config) do
235
- { "convert" => { "column1" => "integer", "column3" => "boolean" } }
235
+ {
236
+ "convert" => {
237
+ "column1" => "integer",
238
+ "column3" => "boolean",
239
+ "column4" => "float",
240
+ "column5" => "date",
241
+ "column6" => "date_time",
242
+ "column7" => "date",
243
+ "column8" => "date_time",
244
+ }
245
+ }
236
246
  end
237
- let(:doc) { "1234,bird,false" }
247
+ # 2017-06-01,2001-02-03T04:05:06+07:00
248
+ let(:doc) { "1234,bird,false,3.14159265359,2017-06-01,2001-02-03 04:05:06,invalid_date,invalid_date_time" }
238
249
  let(:event) { LogStash::Event.new("message" => doc) }
239
250
 
240
- it "get converted values to the expected type" do
251
+ it "converts to integer" do
241
252
  plugin.filter(event)
242
253
  expect(event.get("column1")).to eq(1234)
254
+ end
255
+
256
+ it "does not convert without converter" do
257
+ plugin.filter(event)
243
258
  expect(event.get("column2")).to eq("bird")
259
+ end
260
+
261
+ it "converts to boolean" do
262
+ plugin.filter(event)
244
263
  expect(event.get("column3")).to eq(false)
245
264
  end
246
265
 
266
+ it "converts to float" do
267
+ plugin.filter(event)
268
+ expect(event.get("column4")).to eq(3.14159265359)
269
+ end
270
+
271
+ it "converts to date" do
272
+ plugin.filter(event)
273
+ expect(event.get("column5")).to be_a(LogStash::Timestamp)
274
+ expect(event.get("column5").to_s).to eq(LogStash::Timestamp.new(Date.parse("2017-06-01").to_time).to_s)
275
+ end
276
+
277
+ it "converts to date_time" do
278
+ plugin.filter(event)
279
+ expect(event.get("column6")).to be_a(LogStash::Timestamp)
280
+ expect(event.get("column6").to_s).to eq(LogStash::Timestamp.new(DateTime.parse("2001-02-03 04:05:06").to_time).to_s)
281
+ end
282
+
283
+ it "tries to converts to date but return original" do
284
+ plugin.filter(event)
285
+ expect(event.get("column7")).to eq("invalid_date")
286
+ end
287
+
288
+ it "tries to converts to date_time but return original" do
289
+ plugin.filter(event)
290
+ expect(event.get("column8")).to eq("invalid_date_time")
291
+ end
292
+
247
293
  context "when using column names" do
248
294
 
249
295
  let(:config) do
@@ -259,5 +305,21 @@ describe LogStash::Filters::CSV do
259
305
  end
260
306
  end
261
307
  end
308
+
309
+ describe "given autodetect option" do
310
+ let(:header) { LogStash::Event.new("message" => "first,last,address") }
311
+ let(:doc) { "big,bird,sesame street" }
312
+ let(:config) do
313
+ { "autodetect_column_names" => true }
314
+ end
315
+
316
+ it "extract all the values with the autodetected header" do
317
+ plugin.filter(header)
318
+ plugin.filter(event)
319
+ expect(event.get("first")).to eq("big")
320
+ expect(event.get("last")).to eq("bird")
321
+ expect(event.get("address")).to eq("sesame street")
322
+ end
323
+ end
262
324
  end
263
325
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-filter-csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Elastic
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-14 00:00:00.000000000 Z
11
+ date: 2017-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -56,6 +56,7 @@ files:
56
56
  - LICENSE
57
57
  - NOTICE.TXT
58
58
  - README.md
59
+ - docs/index.asciidoc
59
60
  - lib/logstash/filters/csv.rb
60
61
  - logstash-filter-csv.gemspec
61
62
  - spec/filters/csv_spec.rb
@@ -81,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
81
82
  version: '0'
82
83
  requirements: []
83
84
  rubyforge_project:
84
- rubygems_version: 2.6.3
85
+ rubygems_version: 2.4.8
85
86
  signing_key:
86
87
  specification_version: 4
87
88
  summary: The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names).