logstash-filter-csv 3.0.2 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/docs/index.asciidoc +152 -0
- data/lib/logstash/filters/csv.rb +20 -4
- data/logstash-filter-csv.gemspec +2 -2
- data/spec/filters/csv_spec.rb +66 -4
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b64bc3605560eab32e1532795bcfa5afc2471da5
|
4
|
+
data.tar.gz: 0a17fb598a3ac9932e2e7485309eb3c34f22efd2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c5d0fed6269330d4f0e5de50023f6cba7371f3aecaf4d4996f82a0f5f645a4c4381b627086a2eaef484547ab450b66569bb0e15a4fe71c59f8d5a299d00d24f9
|
7
|
+
data.tar.gz: 89563ed47c27a9a50e5047bd8cc09b8cdbb613154c9a96fabc14cc4f6fa2acf372e56a106adabda8f999aaaa710b9148e261e6f4843257eefc03f29708610be5
|
data/CHANGELOG.md
CHANGED
data/docs/index.asciidoc
ADDED
@@ -0,0 +1,152 @@
|
|
1
|
+
:plugin: csv
|
2
|
+
:type: filter
|
3
|
+
|
4
|
+
///////////////////////////////////////////
|
5
|
+
START - GENERATED VARIABLES, DO NOT EDIT!
|
6
|
+
///////////////////////////////////////////
|
7
|
+
:version: %VERSION%
|
8
|
+
:release_date: %RELEASE_DATE%
|
9
|
+
:changelog_url: %CHANGELOG_URL%
|
10
|
+
:include_path: ../../../logstash/docs/include
|
11
|
+
///////////////////////////////////////////
|
12
|
+
END - GENERATED VARIABLES, DO NOT EDIT!
|
13
|
+
///////////////////////////////////////////
|
14
|
+
|
15
|
+
[id="plugins-{type}-{plugin}"]
|
16
|
+
|
17
|
+
=== Csv
|
18
|
+
|
19
|
+
include::{include_path}/plugin_header.asciidoc[]
|
20
|
+
|
21
|
+
==== Description
|
22
|
+
|
23
|
+
The CSV filter takes an event field containing CSV data, parses it,
|
24
|
+
and stores it as individual fields (can optionally specify the names).
|
25
|
+
This filter can also parse data with any separator, not just commas.
|
26
|
+
|
27
|
+
[id="plugins-{type}s-{plugin}-options"]
|
28
|
+
==== Csv Filter Configuration Options
|
29
|
+
|
30
|
+
This plugin supports the following configuration options plus the <<plugins-{type}s-common-options>> described later.
|
31
|
+
|
32
|
+
[cols="<,<,<",options="header",]
|
33
|
+
|=======================================================================
|
34
|
+
|Setting |Input type|Required
|
35
|
+
| <<plugins-{type}s-{plugin}-autodetect_column_names>> |<<boolean,boolean>>|No
|
36
|
+
| <<plugins-{type}s-{plugin}-autogenerate_column_names>> |<<boolean,boolean>>|No
|
37
|
+
| <<plugins-{type}s-{plugin}-columns>> |<<array,array>>|No
|
38
|
+
| <<plugins-{type}s-{plugin}-convert>> |<<hash,hash>>|No
|
39
|
+
| <<plugins-{type}s-{plugin}-quote_char>> |<<string,string>>|No
|
40
|
+
| <<plugins-{type}s-{plugin}-separator>> |<<string,string>>|No
|
41
|
+
| <<plugins-{type}s-{plugin}-skip_empty_columns>> |<<boolean,boolean>>|No
|
42
|
+
| <<plugins-{type}s-{plugin}-source>> |<<string,string>>|No
|
43
|
+
| <<plugins-{type}s-{plugin}-target>> |<<string,string>>|No
|
44
|
+
|=======================================================================
|
45
|
+
|
46
|
+
Also see <<plugins-{type}s-common-options>> for a list of options supported by all
|
47
|
+
filter plugins.
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
[id="plugins-{type}s-{plugin}-autodetect_column_names"]
|
52
|
+
===== `autodetect_column_names`
|
53
|
+
|
54
|
+
* Value type is <<boolean,boolean>>
|
55
|
+
* Default value is `false`
|
56
|
+
|
57
|
+
Define whether column names should be auto-detected from the header column or not.
|
58
|
+
Defaults to false.
|
59
|
+
|
60
|
+
[id="plugins-{type}s-{plugin}-autogenerate_column_names"]
|
61
|
+
===== `autogenerate_column_names`
|
62
|
+
|
63
|
+
* Value type is <<boolean,boolean>>
|
64
|
+
* Default value is `true`
|
65
|
+
|
66
|
+
Define whether column names should autogenerated or not.
|
67
|
+
Defaults to true. If set to false, columns not having a header specified will not be parsed.
|
68
|
+
|
69
|
+
[id="plugins-{type}s-{plugin}-columns"]
|
70
|
+
===== `columns`
|
71
|
+
|
72
|
+
* Value type is <<array,array>>
|
73
|
+
* Default value is `[]`
|
74
|
+
|
75
|
+
Define a list of column names (in the order they appear in the CSV,
|
76
|
+
as if it were a header line). If `columns` is not configured, or there
|
77
|
+
are not enough columns specified, the default column names are
|
78
|
+
"column1", "column2", etc. In the case that there are more columns
|
79
|
+
in the data than specified in this column list, extra columns will be auto-numbered:
|
80
|
+
(e.g. "user_defined_1", "user_defined_2", "column3", "column4", etc.)
|
81
|
+
|
82
|
+
[id="plugins-{type}s-{plugin}-convert"]
|
83
|
+
===== `convert`
|
84
|
+
|
85
|
+
* Value type is <<hash,hash>>
|
86
|
+
* Default value is `{}`
|
87
|
+
|
88
|
+
Define a set of datatype conversions to be applied to columns.
|
89
|
+
Possible conversions are integer, float, date, date_time, boolean
|
90
|
+
|
91
|
+
# Example:
|
92
|
+
[source,ruby]
|
93
|
+
filter {
|
94
|
+
csv {
|
95
|
+
convert => {
|
96
|
+
"column1" => "integer"
|
97
|
+
"column2" => "boolean"
|
98
|
+
}
|
99
|
+
}
|
100
|
+
}
|
101
|
+
|
102
|
+
[id="plugins-{type}s-{plugin}-quote_char"]
|
103
|
+
===== `quote_char`
|
104
|
+
|
105
|
+
* Value type is <<string,string>>
|
106
|
+
* Default value is `"\""`
|
107
|
+
|
108
|
+
Define the character used to quote CSV fields. If this is not specified
|
109
|
+
the default is a double quote `"`.
|
110
|
+
Optional.
|
111
|
+
|
112
|
+
[id="plugins-{type}s-{plugin}-separator"]
|
113
|
+
===== `separator`
|
114
|
+
|
115
|
+
* Value type is <<string,string>>
|
116
|
+
* Default value is `","`
|
117
|
+
|
118
|
+
Define the column separator value. If this is not specified, the default
|
119
|
+
is a comma `,`. If you want to define a tabulation as a separator, you need
|
120
|
+
to set the value to the actual tab character and not `\t`.
|
121
|
+
Optional.
|
122
|
+
|
123
|
+
[id="plugins-{type}s-{plugin}-skip_empty_columns"]
|
124
|
+
===== `skip_empty_columns`
|
125
|
+
|
126
|
+
* Value type is <<boolean,boolean>>
|
127
|
+
* Default value is `false`
|
128
|
+
|
129
|
+
Define whether empty columns should be skipped.
|
130
|
+
Defaults to false. If set to true, columns containing no value will not get set.
|
131
|
+
|
132
|
+
[id="plugins-{type}s-{plugin}-source"]
|
133
|
+
===== `source`
|
134
|
+
|
135
|
+
* Value type is <<string,string>>
|
136
|
+
* Default value is `"message"`
|
137
|
+
|
138
|
+
The CSV data in the value of the `source` field will be expanded into a
|
139
|
+
data structure.
|
140
|
+
|
141
|
+
[id="plugins-{type}s-{plugin}-target"]
|
142
|
+
===== `target`
|
143
|
+
|
144
|
+
* Value type is <<string,string>>
|
145
|
+
* There is no default value for this setting.
|
146
|
+
|
147
|
+
Define target field for placing the data.
|
148
|
+
Defaults to writing to the root of the event.
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
include::{include_path}/{type}.asciidoc[]
|
data/lib/logstash/filters/csv.rb
CHANGED
@@ -23,7 +23,8 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
23
23
|
config :columns, :validate => :array, :default => []
|
24
24
|
|
25
25
|
# Define the column separator value. If this is not specified, the default
|
26
|
-
# is a comma `,`.
|
26
|
+
# is a comma `,`. If you want to define a tabulation as a separator, you need
|
27
|
+
# to set the value to the actual tab character and not `\t`.
|
27
28
|
# Optional.
|
28
29
|
config :separator, :validate => :string, :default => ","
|
29
30
|
|
@@ -51,11 +52,18 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
51
52
|
# [source,ruby]
|
52
53
|
# filter {
|
53
54
|
# csv {
|
54
|
-
# convert => {
|
55
|
+
# convert => {
|
56
|
+
# "column1" => "integer"
|
57
|
+
# "column2" => "boolean"
|
58
|
+
# }
|
55
59
|
# }
|
56
60
|
# }
|
57
61
|
config :convert, :validate => :hash, :default => {}
|
58
62
|
|
63
|
+
# Define whether column names should be auto-detected from the header column or not.
|
64
|
+
# Defaults to false.
|
65
|
+
config :autodetect_column_names, :validate => :boolean, :default => false
|
66
|
+
|
59
67
|
CONVERTERS = {
|
60
68
|
:integer => lambda do |value|
|
61
69
|
CSV::Converters[:integer].call(value)
|
@@ -66,11 +74,13 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
66
74
|
end,
|
67
75
|
|
68
76
|
:date => lambda do |value|
|
69
|
-
CSV::Converters[:date].call(value)
|
77
|
+
result = CSV::Converters[:date].call(value)
|
78
|
+
result.is_a?(Date) ? LogStash::Timestamp.new(result.to_time) : result
|
70
79
|
end,
|
71
80
|
|
72
81
|
:date_time => lambda do |value|
|
73
|
-
CSV::Converters[:date_time].call(value)
|
82
|
+
result = CSV::Converters[:date_time].call(value)
|
83
|
+
result.is_a?(DateTime) ? LogStash::Timestamp.new(result.to_time) : result
|
74
84
|
end,
|
75
85
|
|
76
86
|
:boolean => lambda do |value|
|
@@ -112,6 +122,12 @@ class LogStash::Filters::CSV < LogStash::Filters::Base
|
|
112
122
|
begin
|
113
123
|
values = CSV.parse_line(source, :col_sep => @separator, :quote_char => @quote_char)
|
114
124
|
|
125
|
+
if (@autodetect_column_names && @columns.empty?)
|
126
|
+
@columns = values
|
127
|
+
event.cancel
|
128
|
+
return
|
129
|
+
end
|
130
|
+
|
115
131
|
values.each_index do |i|
|
116
132
|
unless (@skip_empty_columns && (values[i].nil? || values[i].empty?))
|
117
133
|
unless ignore_field?(i)
|
data/logstash-filter-csv.gemspec
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
|
3
3
|
s.name = 'logstash-filter-csv'
|
4
|
-
s.version = '3.0.
|
4
|
+
s.version = '3.0.3'
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
6
6
|
s.summary = "The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names)."
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
@@ -11,7 +11,7 @@ Gem::Specification.new do |s|
|
|
11
11
|
s.require_paths = ["lib"]
|
12
12
|
|
13
13
|
# Files
|
14
|
-
s.files = Dir[
|
14
|
+
s.files = Dir["lib/**/*","spec/**/*","*.gemspec","*.md","CONTRIBUTORS","Gemfile","LICENSE","NOTICE.TXT", "vendor/jar-dependencies/**/*.jar", "vendor/jar-dependencies/**/*.rb", "VERSION", "docs/**/*"]
|
15
15
|
|
16
16
|
# Tests
|
17
17
|
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
data/spec/filters/csv_spec.rb
CHANGED
@@ -20,7 +20,7 @@ describe LogStash::Filters::CSV do
|
|
20
20
|
|
21
21
|
it "should register" do
|
22
22
|
input = LogStash::Plugin.lookup("filter", "csv").new(config)
|
23
|
-
expect {input.register}.to raise_error
|
23
|
+
expect {input.register}.to raise_error(LogStash::ConfigurationError)
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end
|
@@ -232,18 +232,64 @@ describe LogStash::Filters::CSV do
|
|
232
232
|
describe "using field convertion" do
|
233
233
|
|
234
234
|
let(:config) do
|
235
|
-
{
|
235
|
+
{
|
236
|
+
"convert" => {
|
237
|
+
"column1" => "integer",
|
238
|
+
"column3" => "boolean",
|
239
|
+
"column4" => "float",
|
240
|
+
"column5" => "date",
|
241
|
+
"column6" => "date_time",
|
242
|
+
"column7" => "date",
|
243
|
+
"column8" => "date_time",
|
244
|
+
}
|
245
|
+
}
|
236
246
|
end
|
237
|
-
|
247
|
+
# 2017-06-01,2001-02-03T04:05:06+07:00
|
248
|
+
let(:doc) { "1234,bird,false,3.14159265359,2017-06-01,2001-02-03 04:05:06,invalid_date,invalid_date_time" }
|
238
249
|
let(:event) { LogStash::Event.new("message" => doc) }
|
239
250
|
|
240
|
-
it "
|
251
|
+
it "converts to integer" do
|
241
252
|
plugin.filter(event)
|
242
253
|
expect(event.get("column1")).to eq(1234)
|
254
|
+
end
|
255
|
+
|
256
|
+
it "does not convert without converter" do
|
257
|
+
plugin.filter(event)
|
243
258
|
expect(event.get("column2")).to eq("bird")
|
259
|
+
end
|
260
|
+
|
261
|
+
it "converts to boolean" do
|
262
|
+
plugin.filter(event)
|
244
263
|
expect(event.get("column3")).to eq(false)
|
245
264
|
end
|
246
265
|
|
266
|
+
it "converts to float" do
|
267
|
+
plugin.filter(event)
|
268
|
+
expect(event.get("column4")).to eq(3.14159265359)
|
269
|
+
end
|
270
|
+
|
271
|
+
it "converts to date" do
|
272
|
+
plugin.filter(event)
|
273
|
+
expect(event.get("column5")).to be_a(LogStash::Timestamp)
|
274
|
+
expect(event.get("column5").to_s).to eq(LogStash::Timestamp.new(Date.parse("2017-06-01").to_time).to_s)
|
275
|
+
end
|
276
|
+
|
277
|
+
it "converts to date_time" do
|
278
|
+
plugin.filter(event)
|
279
|
+
expect(event.get("column6")).to be_a(LogStash::Timestamp)
|
280
|
+
expect(event.get("column6").to_s).to eq(LogStash::Timestamp.new(DateTime.parse("2001-02-03 04:05:06").to_time).to_s)
|
281
|
+
end
|
282
|
+
|
283
|
+
it "tries to converts to date but return original" do
|
284
|
+
plugin.filter(event)
|
285
|
+
expect(event.get("column7")).to eq("invalid_date")
|
286
|
+
end
|
287
|
+
|
288
|
+
it "tries to converts to date_time but return original" do
|
289
|
+
plugin.filter(event)
|
290
|
+
expect(event.get("column8")).to eq("invalid_date_time")
|
291
|
+
end
|
292
|
+
|
247
293
|
context "when using column names" do
|
248
294
|
|
249
295
|
let(:config) do
|
@@ -259,5 +305,21 @@ describe LogStash::Filters::CSV do
|
|
259
305
|
end
|
260
306
|
end
|
261
307
|
end
|
308
|
+
|
309
|
+
describe "given autodetect option" do
|
310
|
+
let(:header) { LogStash::Event.new("message" => "first,last,address") }
|
311
|
+
let(:doc) { "big,bird,sesame street" }
|
312
|
+
let(:config) do
|
313
|
+
{ "autodetect_column_names" => true }
|
314
|
+
end
|
315
|
+
|
316
|
+
it "extract all the values with the autodetected header" do
|
317
|
+
plugin.filter(header)
|
318
|
+
plugin.filter(event)
|
319
|
+
expect(event.get("first")).to eq("big")
|
320
|
+
expect(event.get("last")).to eq("bird")
|
321
|
+
expect(event.get("address")).to eq("sesame street")
|
322
|
+
end
|
323
|
+
end
|
262
324
|
end
|
263
325
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logstash-filter-csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Elastic
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2017-05-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- LICENSE
|
57
57
|
- NOTICE.TXT
|
58
58
|
- README.md
|
59
|
+
- docs/index.asciidoc
|
59
60
|
- lib/logstash/filters/csv.rb
|
60
61
|
- logstash-filter-csv.gemspec
|
61
62
|
- spec/filters/csv_spec.rb
|
@@ -81,7 +82,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
81
82
|
version: '0'
|
82
83
|
requirements: []
|
83
84
|
rubyforge_project:
|
84
|
-
rubygems_version: 2.
|
85
|
+
rubygems_version: 2.4.8
|
85
86
|
signing_key:
|
86
87
|
specification_version: 4
|
87
88
|
summary: The CSV filter takes an event field containing CSV data, parses it, and stores it as individual fields (can optionally specify the names).
|