logstash-filter-csv 3.0.2 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +152 -0
- data/lib/logstash/filters/csv.rb +20 -4
- data/logstash-filter-csv.gemspec +2 -2
- data/spec/filters/csv_spec.rb +66 -4
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b64bc3605560eab32e1532795bcfa5afc2471da5
|
4
|
+
data.tar.gz: 0a17fb598a3ac9932e2e7485309eb3c34f22efd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5d0fed6269330d4f0e5de50023f6cba7371f3aecaf4d4996f82a0f5f645a4c4381b627086a2eaef484547ab450b66569bb0e15a4fe71c59f8d5a299d00d24f9
|
7
|
+
data.tar.gz: 89563ed47c27a9a50e5047bd8cc09b8cdbb613154c9a96fabc14cc4f6fa2acf372e56a106adabda8f999aaaa710b9148e261e6f4843257eefc03f29708610be5
|
data/CHANGELOG.md
CHANGED
data/docs/index.asciidoc
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
:plugin: csv
|
2
|
+
:type: filter
|
3
|
+
|
4
|
+
///////////////////////////////////////////
|
5
|
+
START - GENERATED VARIABLES, DO NOT EDIT!
|
6
|
+
///////////////////////////////////////////
|
7
|
+
:version: %VERSION%
|
8
|
+
:release_date: %RELEASE_DATE%
|
9
|
+
:changelog_url: %CHANGELOG_URL%
|
10
|
+
:include_path: ../../../logstash/docs/include
|
11
|
+
///////////////////////////////////////////
|
12
|
+
END - GENERATED VARIABLES, DO NOT EDIT!
|
13
|
+
///////////////////////////////////////////
|
14
|
+
|
15
|
+
[id="plugins-{type}-{plugin}"]
|
16
|
+
|
17
|
+
=== Csv
|
18
|
+
|
19
|
+
include::{include_path}/plugin_header.asciidoc[]
|
20
|
+
|
21
|
+
==== Description
|
22
|
+
|
23
|
+
The CSV filter takes an event field containing CSV data, parses it,
|
24
|
+
and stores it as individual fields (can optionally specify the names).
|
25
|
+
This filter can also parse data with any separator, not just commas.
|
26
|
+
|
27
|
+
[id="plugins-{type}s-{plugin}-options"]
|
28
|
+
==== Csv Filter Configuration Options
|
29
|
+
|
30
|
+
This plugin supports the following configuration options plus the <<plugins-{type}s-common-options>> described later.
|
31
|
+
|
32
|
+
[cols="<,<,<",options="header",]
|
33
|
+
|=======================================================================
|
34
|
+
|Setting |Input type|Required
|
35
|
+
| <<plugins-{type}s-{plugin}-autodetect_column_names>> |<<boolean,boolean>>|No
|
36
|
+
| <<plugins-{type}s-{plugin}-autogenerate_column_names>> |<<boolean,boolean>>|No
|
37
|
+
| <<plugins-{type}s-{plugin}-columns>> |<<array,array>>|No
|
38
|
+
| <<plugins-{type}s-{plugin}-convert>> |<<hash,hash>>|No
|
39
|
+
| <<plugins-{type}s-{plugin}-quote_char>> |<<string,string>>|No
|
40
|
+
| <<plugins-{type}s-{plugin}-separator>> |<<string,string>>|No
|
41
|
+
| <<plugins-{type}s-{plugin}-skip_empty_columns>> |<<boolean,boolean>>|No
|
42
|
+
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
|
43
|
+
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
44
|
+
|=======================================================================
|
45
|
+
|
46
|
+
Also see <<plugins-{type}s-common-options>> for a list of options supported by all
|
47
|
+
filter plugins.
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
[id="plugins-{type}s-{plugin}-autodetect_column_names"]
|
52
|
+
===== `autodetect_column_names`
|
53
|
+
|
54
|
+
* Value type is <<boolean,boolean>>
|
55
|
+
* Default value is `false`
|
56
|
+
|
57
|
+
Define whether column names should be auto-detected from the header column or not.
|
58
|
+
Defaults to false.
|
59
|
+
|
60
|
+
[id="plugins-{type}s-{plugin}-autogenerate_column_names"]
|
61
|
+
===== `autogenerate_column_names`
|
62
|
+
|
63
|
+
* Value type is <<boolean,boolean>>
|
64
|
+
* Default value is `true`
|
65
|
+
|
66
|
+
Define whether column names should autogenerated or not.
|
67
|
+
Defaults to true. If set to false, columns not having a header specified will not be parsed.
|
68
|
+
|
69
|
+
[id="plugins-{type}s-{plugin}-columns"]
|
70
|
+
===== `columns`
|
71
|
+
|
72
|
+
* Value type is <<array,array>>
|
73
|
+
* Default value is `[]`
|
74
|
+
|
75
|
+
Define a list of column names (in the order they appear in the CSV,
|
76
|
+
as if it were a header line). If `columns` is not configured, or there
|
77
|
+
are not enough columns specified, the default column names are
|
78
|
+
"column1", "column2", etc. In the case that there are more columns
|
79
|
+
in the data than specified in this column list, extra columns will be auto-numbered:
|
80
|
+
(e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
|
81
|
+
|
82
|
+
[id="plugins-{type}s-{plugin}-convert"]
|
83
|
+
===== `convert`
|
84
|
+
|
85
|
+
* Value type is <<hash,hash>>
|
86
|
+
* Default value is `{}`
|
87
|
+
|
88
|
+
Define a set of datatype conversions to be applied to columns.
|
89
|
+
Possible conversions are integer, float, date, date_time, boolean
|
90
|
+
|
91
|
+
# Example:
|
92
|
+
[source,ruby]
|
93
|
+
filter {
|
94
|
+
csv {
|
95
|
+
convert => {
|
96
|
+
"column1" => "integer"
|
97
|
+
"column2" => "boolean"
|
98
|
+
}
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
[id="plugins-{type}s-{plugin}-quote_char"]
|
103
|
+
===== `quote_char`
|
104
|
+
|
105
|
+
* Value type is <<string,string>>
|
106
|
+
* Default value is `"\""`
|
107
|
+
|
108
|
+
Define the character used to quote CSV fields. If this is not specified
|
109
|
+
the default is a double quote `"`.
|
110
|
+
Optional.
|
111
|
+
|
112
|
+
[id="plugins-{type}s-{plugin}-separator"]
|
113
|
+
===== `separator`
|
114
|
+
|
115
|
+
* Value type is <<string,string>>
|
116
|
+
* Default value is `","`
|
117
|
+
|
118
|
+
Define the column separator value. If this is not specified, the default
|
119
|
+
is a comma `,`. If you want to define a tabulation as a separator, you need
|
120
|
+
to set the value to the actual tab character and not `\t`.
|
121
|
+
Optional.
|
122
|
+
|
123
|
+
[id="plugins-{type}s-{plugin}-skip_empty_columns"]
|
124
|
+
===== `skip_empty_columns`
|
125
|
+
|
126
|
+
* Value type is <<boolean,boolean>>
|
127
|
+
* Default value is `false`
|
128
|
+
|
129
|
+
Define whether empty columns should be skipped.
|
130
|
+
Defaults to false. If set to true, columns containing no value will not get set.
|
131
|
+
|
132
|
+
[id="plugins-{type}s-{plugin}-source"]
|
133
|
+
===== `source`
|
134
|
+
|
135
|
+
* Value type is <<string,string>>
|
136
|
+
* Default value is `"message"`
|
137
|
+
|
138
|
+
The CSV data in the value of the `source` field will be expanded into a
|
139
|
+
data structure.
|
140
|
+
|
141
|
+
[id="plugins-{type}s-{plugin}-target"]
|
142
|
+
===== `target`
|
143
|
+
|
144
|
+
* Value type is <<string,string>>
|
145
|
+
* There is no default value for this setting.
|
146
|
+
|
147
|
+
Define target field for placing the data.
|
148
|
+
Defaults to writing to the root of the event.
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/csv.rb
CHANGED
@@ -23,7 +23,8 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
23
23
|
config :columns, :validate => :array, :default => []
|
24
24
|
|
25
25
|
# Define the column separator value. If this is not specified, the default
|
26
|
-
# is a comma `,`.
|
26
|
+
# is a comma `,`. If you want to define a tabulation as a separator, you need
|
27
|
+
# to set the value to the actual tab character and not `\t`.
|
27
28
|
# Optional.
|
28
29
|
config :separator, :validate => :string, :default => ","
|
29
30
|
|
@@ -51,11 +52,18 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
51
52
|
# [source,ruby]
|
52
53
|
# filter {
|
53
54
|
# csv {
|
54
|
-
# convert => {
|
55
|
+
# convert => {
|
56
|
+
# "column1" => "integer"
|
57
|
+
# "column2" => "boolean"
|
58
|
+
# }
|
55
59
|
# }
|
56
60
|
# }
|
57
61
|
config :convert, :validate => :hash, :default => {}
|
58
62
|
|
63
|
+
# Define whether column names should be auto-detected from the header column or not.
|
64
|
+
# Defaults to false.
|
65
|
+
config :autodetect_column_names, :validate => :boolean, :default => false
|
66
|
+
|
59
67
|
CONVERTERS = {
|
60
68
|
:integer => lambda do |value|
|
61
69
|
CSV::Converters[:integer].call(value)
|
@@ -66,11 +74,13 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
66
74
|
end,
|
67
75
|
|
68
76
|
:date => lambda do |value|
|
69
|
-
CSV::Converters[:date].call(value)
|
77
|
+
result = CSV::Converters[:date].call(value)
|
78
|
+
result.is_a?(Date) ? LogStash::Timestamp.new(result.to_time) : result
|
70
79
|
end,
|
71
80
|
|
72
81
|
:date_time => lambda do |value|
|
73
|
-
CSV::Converters[:date_time].call(value)
|
82
|
+
result = CSV::Converters[:date_time].call(value)
|
83
|
+
result.is_a?(DateTime) ? LogStash::Timestamp.new(result.to_time) : result
|
74
84
|
end,
|
75
85
|
|
76
86
|
:boolean => lambda do |value|
|
@@ -112,6 +122,12 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
112
122
|
begin
|
113
123
|
values = CSV.parse_line(source, :col_sep => @separator, :quote_char => @quote_char)
|
114
124
|
|
125
|
+
if (@autodetect_column_names && @columns.empty?)
|
126
|
+
@columns = values
|
127
|
+
event.cancel
|
128
|
+
return
|
129
|
+
end
|
130
|
+
|
115
131
|
values.each_index do |i|
|
116
132
|
unless (@skip_empty_columns && (values[i].nil? || values[i].empty?))
|
117
133
|
unless ignore_field?(i)
|
data/logstash-filter-csv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-csv'
|
4
|
-
s.version = '3.0.
|
4
|
+
s.version = '3.0.3'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.require_paths = ["lib"]
|
12
12
|
|
13
13
|
# Files
|
14
|
-
s.files = Dir[
|
14
|
+
s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
|
15
15
|
|
16
16
|
# Tests
|
17
17
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
data/spec/filters/csv_spec.rb
CHANGED
@@ -20,7 +20,7 @@ describe LogStash::Filters::CSV do
|
|
20
20
|
|
21
21
|
it "should register" do
|
22
22
|
input = LogStash::Plugin.lookup("filter", "csv").new(config)
|
23
|
-
expect {input.register}.to raise_error
|
23
|
+
expect {input.register}.to raise_error(LogStash::ConfigurationError)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -232,18 +232,64 @@ describe LogStash::Filters::CSV do
|
|
232
232
|
describe "using field convertion" do
|
233
233
|
|
234
234
|
let(:config) do
|
235
|
-
{
|
235
|
+
{
|
236
|
+
"convert" => {
|
237
|
+
"column1" => "integer",
|
238
|
+
"column3" => "boolean",
|
239
|
+
"column4" => "float",
|
240
|
+
"column5" => "date",
|
241
|
+
"column6" => "date_time",
|
242
|
+
"column7" => "date",
|
243
|
+
"column8" => "date_time",
|
244
|
+
}
|
245
|
+
}
|
236
246
|
end
|
237
|
-
|
247
|
+
# 2017-06-01,2001-02-03T04:05:06+07:00
|
248
|
+
let(:doc) { "1234,bird,false,3.14159265359,2017-06-01,2001-02-03 04:05:06,invalid_date,invalid_date_time" }
|
238
249
|
let(:event) { LogStash::Event.new("message" => doc) }
|
239
250
|
|
240
|
-
it "
|
251
|
+
it "converts to integer" do
|
241
252
|
plugin.filter(event)
|
242
253
|
expect(event.get("column1")).to eq(1234)
|
254
|
+
end
|
255
|
+
|
256
|
+
it "does not convert without converter" do
|
257
|
+
plugin.filter(event)
|
243
258
|
expect(event.get("column2")).to eq("bird")
|
259
|
+
end
|
260
|
+
|
261
|
+
it "converts to boolean" do
|
262
|
+
plugin.filter(event)
|
244
263
|
expect(event.get("column3")).to eq(false)
|
245
264
|
end
|
246
265
|
|
266
|
+
it "converts to float" do
|
267
|
+
plugin.filter(event)
|
268
|
+
expect(event.get("column4")).to eq(3.14159265359)
|
269
|
+
end
|
270
|
+
|
271
|
+
it "converts to date" do
|
272
|
+
plugin.filter(event)
|
273
|
+
expect(event.get("column5")).to be_a(LogStash::Timestamp)
|
274
|
+
expect(event.get("column5").to_s).to eq(LogStash::Timestamp.new(Date.parse("2017-06-01").to_time).to_s)
|
275
|
+
end
|
276
|
+
|
277
|
+
it "converts to date_time" do
|
278
|
+
plugin.filter(event)
|
279
|
+
expect(event.get("column6")).to be_a(LogStash::Timestamp)
|
280
|
+
expect(event.get("column6").to_s).to eq(LogStash::Timestamp.new(DateTime.parse("2001-02-03 04:05:06").to_time).to_s)
|
281
|
+
end
|
282
|
+
|
283
|
+
it "tries to converts to date but return original" do
|
284
|
+
plugin.filter(event)
|
285
|
+
expect(event.get("column7")).to eq("invalid_date")
|
286
|
+
end
|
287
|
+
|
288
|
+
it "tries to converts to date_time but return original" do
|
289
|
+
plugin.filter(event)
|
290
|
+
expect(event.get("column8")).to eq("invalid_date_time")
|
291
|
+
end
|
292
|
+
|
247
293
|
context "when using column names" do
|
248
294
|
|
249
295
|
let(:config) do
|
@@ -259,5 +305,21 @@ describe LogStash::Filters::CSV do
|
|
259
305
|
end
|
260
306
|
end
|
261
307
|
end
|
308
|
+
|
309
|
+
describe "given autodetect option" do
|
310
|
+
let(:header) { LogStash::Event.new("message" => "first,last,address") }
|
311
|
+
let(:doc) { "big,bird,sesame street" }
|
312
|
+
let(:config) do
|
313
|
+
{ "autodetect_column_names" => true }
|
314
|
+
end
|
315
|
+
|
316
|
+
it "extract all the values with the autodetected header" do
|
317
|
+
plugin.filter(header)
|
318
|
+
plugin.filter(event)
|
319
|
+
expect(event.get("first")).to eq("big")
|
320
|
+
expect(event.get("last")).to eq("bird")
|
321
|
+
expect(event.get("address")).to eq("sesame street")
|
322
|
+
end
|
323
|
+
end
|
262
324
|
end
|
263
325
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- LICENSE
|
57
57
|
- NOTICE.TXT
|
58
58
|
- README.md
|
59
|
+
- docs/index.asciidoc
|
59
60
|
- lib/logstash/filters/csv.rb
|
60
61
|
- logstash-filter-csv.gemspec
|
61
62
|
- spec/filters/csv_spec.rb
|
@@ -81,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
82
|
version: '0'
|
82
83
|
requirements: []
|
83
84
|
rubyforge_project:
|
84
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.4.8
|
85
86
|
signing_key:
|
86
87
|
specification_version: 4
|
87
88
|
summary: The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names).
|