logstash-codec-csv 0.1.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +5 -0
- data/LICENSE +1 -1
- data/docs/index.asciidoc +23 -12
- data/lib/logstash/codecs/csv.rb +96 -86
- data/logstash-codec-csv.gemspec +2 -2
- data/spec/codecs/csv_spec.rb +99 -46
- metadata +16 -8
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: ea5265ba1dbb2589596a6679118b058d9f73db36c548d8454b2e63a533808908
|
|
4
|
+
data.tar.gz: 45ddf7937e0f918f72318af5eb99cd3994811ca5f624c5d88071c2da4830790c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: fba5feae76e970343fe6541d569cd5701f93d227186c60b13da1e93f41c3f4f24626701257c76169ca94e74a8c56a945a98576f5e169ef7f266e5ee12c8feb4e
|
|
7
|
+
data.tar.gz: f333ec425b38c6b5d83ea8cbeaaefc032bf2b0ec6de1580b9d08ba0561127f609742fa676408ce46ac366d8cf41d6cc76c9b952ebde55c9d9ee314a921821122
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
## 1.0.0
|
|
2
|
+
- Fixed dependencies to work with logstash v6 and up. Overhauled to match features of the CSV Filter. Improved spec coverage [#4](https://github.com/logstash-plugins/logstash-codec-csv/pull/4)
|
|
3
|
+
## 0.1.5
|
|
4
|
+
- Fixed asciidoc formatting for example [#3](https://github.com/logstash-plugins/logstash-codec-csv/pull/3)
|
|
5
|
+
|
|
1
6
|
## 0.1.4
|
|
2
7
|
- Fix some documentation issues
|
|
3
8
|
|
data/LICENSE
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
Copyright (c) 2012-
|
|
1
|
+
Copyright (c) 2012-2018 Elasticsearch <http://www.elasticsearch.org>
|
|
2
2
|
|
|
3
3
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
4
|
you may not use this file except in compliance with the License.
|
data/docs/index.asciidoc
CHANGED
|
@@ -20,7 +20,7 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
|
20
20
|
|
|
21
21
|
==== Description
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
The csv codec takes CSV data, parses it and passes it along.
|
|
24
24
|
|
|
25
25
|
[id="plugins-{type}s-{plugin}-options"]
|
|
26
26
|
==== Csv Codec Configuration Options
|
|
@@ -28,6 +28,7 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
|
28
28
|
[cols="<,<,<",options="header",]
|
|
29
29
|
|=======================================================================
|
|
30
30
|
|Setting |Input type|Required
|
|
31
|
+
| <<plugins-{type}s-{plugin}-autodetect_column_names>> |<<boolean,boolean>>|No
|
|
31
32
|
| <<plugins-{type}s-{plugin}-autogenerate_column_names>> |<<boolean,boolean>>|No
|
|
32
33
|
| <<plugins-{type}s-{plugin}-charset>> |<<string,string>>, one of `["ASCII-8BIT", "UTF-8", "US-ASCII", "Big5", "Big5-HKSCS", "Big5-UAO", "CP949", "Emacs-Mule", "EUC-JP", "EUC-KR", "EUC-TW", "GB2312", "GB18030", "GBK", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "ISO-8859-13", "ISO-8859-14", "ISO-8859-15", "ISO-8859-16", "KOI8-R", "KOI8-U", "Shift_JIS", "UTF-16BE", "UTF-16LE", "UTF-32BE", "UTF-32LE", "Windows-31J", "Windows-1250", "Windows-1251", "Windows-1252", "IBM437", "IBM737", "IBM775", "CP850", "IBM852", "CP852", "IBM855", "CP855", "IBM857", "IBM860", "IBM861", "IBM862", "IBM863", "IBM864", "IBM865", "IBM866", "IBM869", "Windows-1258", "GB1988", "macCentEuro", "macCroatian", "macCyrillic", "macGreek", "macIceland", "macRoman", "macRomania", "macThai", "macTurkish", "macUkraine", "CP950", "CP951", "IBM037", "stateless-ISO-2022-JP", "eucJP-ms", "CP51932", "EUC-JIS-2004", "GB12345", "ISO-2022-JP", "ISO-2022-JP-2", "CP50220", "CP50221", "Windows-1256", "Windows-1253", "Windows-1255", "Windows-1254", "TIS-620", "Windows-874", "Windows-1257", "MacJapanese", "UTF-7", "UTF8-MAC", "UTF-16", "UTF-32", "UTF8-DoCoMo", "SJIS-DoCoMo", "UTF8-KDDI", "SJIS-KDDI", "ISO-2022-JP-KDDI", "stateless-ISO-2022-JP-KDDI", "UTF8-SoftBank", "SJIS-SoftBank", "BINARY", "CP437", "CP737", "CP775", "IBM850", "CP857", "CP860", "CP861", "CP862", "CP863", "CP864", "CP865", "CP866", "CP869", "CP1258", "Big5-HKSCS:2008", "ebcdic-cp-us", "eucJP", "euc-jp-ms", "EUC-JISX0213", "eucKR", "eucTW", "EUC-CN", "eucCN", "CP936", "ISO2022-JP", "ISO2022-JP2", "ISO8859-1", "ISO8859-2", "ISO8859-3", "ISO8859-4", "ISO8859-5", "ISO8859-6", "CP1256", "ISO8859-7", "CP1253", "ISO8859-8", "CP1255", "ISO8859-9", "CP1254", "ISO8859-10", "ISO8859-11", "CP874", "ISO8859-13", "CP1257", "ISO8859-14", "ISO8859-15", "ISO8859-16", "CP878", "MacJapan", "ASCII", "ANSI_X3.4-1968", "646", "CP65000", "CP65001", "UTF-8-MAC", "UTF-8-HFS", "UCS-2BE", "UCS-4BE", "UCS-4LE", "CP932", "csWindows31J", "SJIS", "PCK", "CP1250", "CP1251", "CP1252", "external", "locale"]`|No
|
|
33
34
|
| <<plugins-{type}s-{plugin}-columns>> |<<array,array>>|No
|
|
@@ -40,13 +41,22 @@ include::{include_path}/plugin_header.asciidoc[]
|
|
|
40
41
|
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
[id="plugins-{type}s-{plugin}-autodetect_column_names"]
|
|
45
|
+
===== `autodetect_column_names`
|
|
46
|
+
|
|
47
|
+
* Value type is <<boolean,boolean>>
|
|
48
|
+
* Default value is `false`
|
|
49
|
+
|
|
50
|
+
Define whether column names should be auto-detected from the header column or not.
|
|
51
|
+
Defaults to false.
|
|
52
|
+
|
|
43
53
|
[id="plugins-{type}s-{plugin}-autogenerate_column_names"]
|
|
44
54
|
===== `autogenerate_column_names`
|
|
45
55
|
|
|
46
56
|
* Value type is <<boolean,boolean>>
|
|
47
57
|
* Default value is `true`
|
|
48
58
|
|
|
49
|
-
Define whether column names should autogenerated or not.
|
|
59
|
+
Define whether column names should be autogenerated or not.
|
|
50
60
|
Defaults to true. If set to false, columns not having a header specified will not be parsed.
|
|
51
61
|
|
|
52
62
|
[id="plugins-{type}s-{plugin}-charset"]
|
|
@@ -65,12 +75,14 @@ The character encoding used in this codec. Examples include "UTF-8" and
|
|
|
65
75
|
* Value type is <<array,array>>
|
|
66
76
|
* Default value is `[]`
|
|
67
77
|
|
|
78
|
+
**When decoding:**
|
|
68
79
|
Define a list of column names (in the order they appear in the CSV,
|
|
69
80
|
as if it were a header line). If `columns` is not configured, or there
|
|
70
81
|
are not enough columns specified, the default column names are
|
|
71
|
-
"column1", "column2", etc.
|
|
72
|
-
|
|
73
|
-
|
|
82
|
+
"column1", "column2", etc.
|
|
83
|
+
|
|
84
|
+
**When encoding:**
|
|
85
|
+
List of fields names to include in the encoded CSV, in the order listed.
|
|
74
86
|
|
|
75
87
|
[id="plugins-{type}s-{plugin}-convert"]
|
|
76
88
|
===== `convert`
|
|
@@ -79,9 +91,10 @@ in the data than specified in this column list, extra columns will be auto-numbe
|
|
|
79
91
|
* Default value is `{}`
|
|
80
92
|
|
|
81
93
|
Define a set of datatype conversions to be applied to columns.
|
|
82
|
-
Possible conversions are integer
|
|
94
|
+
Possible conversions are: `integer`, `float`, `date`, `date_time`, `boolean`
|
|
95
|
+
|
|
83
96
|
|
|
84
|
-
|
|
97
|
+
*Example*
|
|
85
98
|
[source,ruby]
|
|
86
99
|
filter {
|
|
87
100
|
csv {
|
|
@@ -95,10 +108,8 @@ Possible conversions are integer, float, date, date_time, boolean
|
|
|
95
108
|
* Value type is <<boolean,boolean>>
|
|
96
109
|
* Default value is `false`
|
|
97
110
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
be reset on demand, useful for example when dealing with new files in the file input
|
|
101
|
-
or new request in the http_poller. Default => false
|
|
111
|
+
When **encoding** in an output plugin, include headers in the encoded CSV
|
|
112
|
+
once per codec lifecyle (not for every event). Default => false
|
|
102
113
|
|
|
103
114
|
[id="plugins-{type}s-{plugin}-quote_char"]
|
|
104
115
|
===== `quote_char`
|
|
@@ -127,6 +138,6 @@ Optional.
|
|
|
127
138
|
* Default value is `false`
|
|
128
139
|
|
|
129
140
|
Define whether empty columns should be skipped.
|
|
130
|
-
Defaults to false. If set to true, columns containing no value will not
|
|
141
|
+
Defaults to false. If set to true, columns containing no value will not be included.
|
|
131
142
|
|
|
132
143
|
|
data/lib/logstash/codecs/csv.rb
CHANGED
|
@@ -7,12 +7,13 @@ class LogStash::Codecs::CSV < LogStash::Codecs::Base
|
|
|
7
7
|
|
|
8
8
|
config_name "csv"
|
|
9
9
|
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
10
|
+
# When decoding:
|
|
11
|
+
# Define a list of column names (in the order they appear in the CSV,
|
|
12
|
+
# as if it were a header line). If `columns` is not configured, or there
|
|
13
|
+
# are not enough columns specified, the default column names are
|
|
14
|
+
# "column1", "column2", etc.
|
|
15
|
+
# When encoding:
|
|
16
|
+
# List of fields names to include in the encoded CSV, in the order listed.
|
|
16
17
|
config :columns, :validate => :array, :default => []
|
|
17
18
|
|
|
18
19
|
# Define the column separator value. If this is not specified, the default
|
|
@@ -25,20 +26,22 @@ class LogStash::Codecs::CSV < LogStash::Codecs::Base
|
|
|
25
26
|
# Optional.
|
|
26
27
|
config :quote_char, :validate => :string, :default => '"'
|
|
27
28
|
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
# be reset on demand, useful for example when dealing with new files in the file input
|
|
31
|
-
# or new request in the http_poller. Default => false
|
|
29
|
+
# When encoding in an output plugin, include headers in the encoded CSV
|
|
30
|
+
# once per codec lifecyle (not for every event). Default => false
|
|
32
31
|
config :include_headers, :validate => :boolean, :default => false
|
|
33
32
|
|
|
34
|
-
# Define whether column names should autogenerated or not.
|
|
33
|
+
# Define whether column names should be autogenerated or not.
|
|
35
34
|
# Defaults to true. If set to false, columns not having a header specified will not be parsed.
|
|
36
35
|
config :autogenerate_column_names, :validate => :boolean, :default => true
|
|
37
36
|
|
|
38
37
|
# Define whether empty columns should be skipped.
|
|
39
|
-
# Defaults to false. If set to true, columns containing no value will not
|
|
38
|
+
# Defaults to false. If set to true, columns containing no value will not be included.
|
|
40
39
|
config :skip_empty_columns, :validate => :boolean, :default => false
|
|
41
40
|
|
|
41
|
+
# Define whether column names should be auto-detected from the header column or not.
|
|
42
|
+
# Defaults to false.
|
|
43
|
+
config :autodetect_column_names, :validate => :boolean, :default => false
|
|
44
|
+
|
|
42
45
|
# Define a set of datatype conversions to be applied to columns.
|
|
43
46
|
# Possible conversions are integer, float, date, date_time, boolean
|
|
44
47
|
#
|
|
@@ -51,113 +54,120 @@ class LogStash::Codecs::CSV < LogStash::Codecs::Base
|
|
|
51
54
|
# }
|
|
52
55
|
config :convert, :validate => :hash, :default => {}
|
|
53
56
|
|
|
54
|
-
##
|
|
55
|
-
# List of valid conversion types used for the convert option
|
|
56
|
-
##
|
|
57
|
-
VALID_CONVERT_TYPES = [ "integer", "float", "date", "date_time", "boolean" ].freeze
|
|
58
|
-
|
|
59
|
-
|
|
60
57
|
# The character encoding used in this codec. Examples include "UTF-8" and
|
|
61
58
|
# "CP1252".
|
|
62
59
|
config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
|
|
63
60
|
|
|
61
|
+
CONVERTERS = {
|
|
62
|
+
:integer => lambda do |value|
|
|
63
|
+
CSV::Converters[:integer].call(value)
|
|
64
|
+
end,
|
|
65
|
+
|
|
66
|
+
:float => lambda do |value|
|
|
67
|
+
CSV::Converters[:float].call(value)
|
|
68
|
+
end,
|
|
69
|
+
|
|
70
|
+
:date => lambda do |value|
|
|
71
|
+
result = CSV::Converters[:date].call(value)
|
|
72
|
+
result.is_a?(Date) ? LogStash::Timestamp.new(result.to_time) : result
|
|
73
|
+
end,
|
|
74
|
+
|
|
75
|
+
:date_time => lambda do |value|
|
|
76
|
+
result = CSV::Converters[:date_time].call(value)
|
|
77
|
+
result.is_a?(DateTime) ? LogStash::Timestamp.new(result.to_time) : result
|
|
78
|
+
end,
|
|
79
|
+
|
|
80
|
+
:boolean => lambda do |value|
|
|
81
|
+
value = value.strip.downcase
|
|
82
|
+
return false if value == "false"
|
|
83
|
+
return true if value == "true"
|
|
84
|
+
return value
|
|
85
|
+
end
|
|
86
|
+
}
|
|
87
|
+
CONVERTERS.default = lambda {|v| v}
|
|
88
|
+
CONVERTERS.freeze
|
|
89
|
+
|
|
64
90
|
def register
|
|
65
91
|
@converter = LogStash::Util::Charset.new(@charset)
|
|
66
92
|
@converter.logger = @logger
|
|
67
93
|
|
|
68
94
|
# validate conversion types to be the valid ones.
|
|
69
|
-
@convert.
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
95
|
+
bad_types = @convert.values.select do |type|
|
|
96
|
+
!CONVERTERS.has_key?(type.to_sym)
|
|
97
|
+
end.uniq
|
|
98
|
+
raise(LogStash::ConfigurationError, "Invalid conversion types: #{bad_types.join(', ')}") unless bad_types.empty?
|
|
99
|
+
|
|
100
|
+
# @convert_symbols contains the symbolized types to avoid symbol conversion in the transform method
|
|
101
|
+
@convert_symbols = @convert.each_with_object({}){|(k, v), result| result[k] = v.to_sym}
|
|
102
|
+
|
|
103
|
+
# if the zero byte character is entered in the config, set the value
|
|
104
|
+
if (@quote_char == "\\x00")
|
|
105
|
+
@quote_char = "\x00"
|
|
73
106
|
end
|
|
74
107
|
|
|
75
|
-
@
|
|
76
|
-
@options = { :col_sep => @separator, :quote_char => @quote_char }
|
|
108
|
+
@logger.debug? && @logger.debug("CSV parsing options", :col_sep => @separator, :quote_char => @quote_char)
|
|
77
109
|
end
|
|
78
110
|
|
|
79
111
|
def decode(data)
|
|
80
112
|
data = @converter.convert(data)
|
|
81
113
|
begin
|
|
82
|
-
values = CSV.parse_line(data, @
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
@
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
field_name =
|
|
96
|
-
value = fields[1]
|
|
114
|
+
values = CSV.parse_line(data, :col_sep => @separator, :quote_char => @quote_char)
|
|
115
|
+
|
|
116
|
+
if (@autodetect_column_names && @columns.empty?)
|
|
117
|
+
@columns = values
|
|
118
|
+
@logger.debug? && @logger.debug("Auto detected the following columns", :columns => @columns.inspect)
|
|
119
|
+
return
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
decoded = {}
|
|
123
|
+
values.each_index do |i|
|
|
124
|
+
unless (@skip_empty_columns && (values[i].nil? || values[i].empty?))
|
|
125
|
+
unless ignore_field?(i)
|
|
126
|
+
field_name = @columns[i] || "column#{i + 1}"
|
|
127
|
+
decoded[field_name] = transform(field_name, values[i])
|
|
97
128
|
end
|
|
98
|
-
next unless field_name
|
|
99
|
-
decoded[field_name] = if should_transform?(field_name)
|
|
100
|
-
transform(field_name, value)
|
|
101
|
-
else
|
|
102
|
-
value
|
|
103
|
-
end
|
|
104
129
|
end
|
|
105
|
-
yield LogStash::Event.new(decoded) if block_given?
|
|
106
130
|
end
|
|
131
|
+
|
|
132
|
+
yield LogStash::Event.new(decoded)
|
|
107
133
|
rescue CSV::MalformedCSVError => e
|
|
108
|
-
@logger.
|
|
109
|
-
yield LogStash::Event.new("message" => data, "tags" => ["_csvparsefailure"])
|
|
134
|
+
@logger.error("CSV parse failure. Falling back to plain-text", :error => e, :data => data)
|
|
135
|
+
yield LogStash::Event.new("message" => data, "tags" => ["_csvparsefailure"])
|
|
110
136
|
end
|
|
111
137
|
end
|
|
112
138
|
|
|
113
139
|
def encode(event)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
140
|
+
if @include_headers
|
|
141
|
+
csv_data = CSV.generate_line(select_keys(event), :col_sep => @separator, :quote_char => @quote_char, :headers => true)
|
|
142
|
+
@on_event.call(event, csv_data)
|
|
143
|
+
|
|
144
|
+
# output headers only once per codec lifecycle
|
|
145
|
+
@include_headers = false
|
|
146
|
+
end
|
|
117
147
|
|
|
118
|
-
|
|
119
|
-
@
|
|
120
|
-
@options.delete(:headers)
|
|
148
|
+
csv_data = CSV.generate_line(select_values(event), :col_sep => @separator, :quote_char => @quote_char)
|
|
149
|
+
@on_event.call(event, csv_data)
|
|
121
150
|
end
|
|
122
151
|
|
|
123
152
|
private
|
|
124
153
|
|
|
125
|
-
def
|
|
126
|
-
|
|
154
|
+
def select_values(event)
|
|
155
|
+
if @columns.empty?
|
|
156
|
+
event.to_hash.values
|
|
157
|
+
else
|
|
158
|
+
@columns.map {|column| event.get(column)}
|
|
159
|
+
end
|
|
127
160
|
end
|
|
128
161
|
|
|
129
|
-
def
|
|
130
|
-
|
|
162
|
+
def select_keys(event)
|
|
163
|
+
@columns.empty? ? event.to_hash.keys : @columns
|
|
131
164
|
end
|
|
132
165
|
|
|
133
|
-
def
|
|
134
|
-
|
|
135
|
-
converters[transformation].call(value)
|
|
166
|
+
def ignore_field?(index)
|
|
167
|
+
!@columns[index] && !@autogenerate_column_names
|
|
136
168
|
end
|
|
137
169
|
|
|
138
|
-
def
|
|
139
|
-
@
|
|
140
|
-
:integer => lambda do |value|
|
|
141
|
-
CSV::Converters[:integer].call(value)
|
|
142
|
-
end,
|
|
143
|
-
:float => lambda do |value|
|
|
144
|
-
CSV::Converters[:float].call(value)
|
|
145
|
-
|
|
146
|
-
end,
|
|
147
|
-
:date => lambda do |value|
|
|
148
|
-
CSV::Converters[:date].call(value)
|
|
149
|
-
|
|
150
|
-
end,
|
|
151
|
-
:date_time => lambda do |value|
|
|
152
|
-
CSV::Converters[:date_time].call(value)
|
|
153
|
-
end,
|
|
154
|
-
:boolean => lambda do |value|
|
|
155
|
-
value = value.strip.downcase
|
|
156
|
-
return false if value == "false"
|
|
157
|
-
return true if value == "true"
|
|
158
|
-
return value
|
|
159
|
-
end
|
|
160
|
-
}
|
|
170
|
+
def transform(field_name, value)
|
|
171
|
+
CONVERTERS[@convert_symbols[field_name]].call(value)
|
|
161
172
|
end
|
|
162
|
-
|
|
163
|
-
end # class LogStash::Codecs::Plain
|
|
173
|
+
end
|
data/logstash-codec-csv.gemspec
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Gem::Specification.new do |s|
|
|
2
2
|
|
|
3
3
|
s.name = 'logstash-codec-csv'
|
|
4
|
-
s.version = '0.
|
|
4
|
+
s.version = '1.0.0'
|
|
5
5
|
s.licenses = ['Apache License (2.0)']
|
|
6
6
|
s.summary = "The csv codec take CSV data, parses it and passes it away"
|
|
7
7
|
s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
|
|
@@ -20,7 +20,7 @@ Gem::Specification.new do |s|
|
|
|
20
20
|
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "codec" }
|
|
21
21
|
|
|
22
22
|
# Gem dependencies
|
|
23
|
-
s.add_runtime_dependency "logstash-core-plugin-api", "
|
|
23
|
+
s.add_runtime_dependency "logstash-core-plugin-api", ">= 1.60", "<= 2.99"
|
|
24
24
|
|
|
25
25
|
s.add_development_dependency 'logstash-devutils'
|
|
26
26
|
end
|
data/spec/codecs/csv_spec.rb
CHANGED
|
@@ -12,14 +12,13 @@ describe LogStash::Codecs::CSV do
|
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
describe "decode" do
|
|
15
|
-
|
|
16
15
|
let(:data) { "big,bird,sesame street" }
|
|
17
16
|
|
|
18
17
|
it "return an event from CSV data" do
|
|
19
18
|
codec.decode(data) do |event|
|
|
20
|
-
expect(event
|
|
21
|
-
expect(event
|
|
22
|
-
expect(event
|
|
19
|
+
expect(event.get("column1")).to eq("big")
|
|
20
|
+
expect(event.get("column2")).to eq("bird")
|
|
21
|
+
expect(event.get("column3")).to eq("sesame street")
|
|
23
22
|
end
|
|
24
23
|
end
|
|
25
24
|
|
|
@@ -31,9 +30,9 @@ describe LogStash::Codecs::CSV do
|
|
|
31
30
|
|
|
32
31
|
it "extract all the values" do
|
|
33
32
|
codec.decode(data) do |event|
|
|
34
|
-
expect(event
|
|
35
|
-
expect(event
|
|
36
|
-
expect(event
|
|
33
|
+
expect(event.get("first")).to eq("big")
|
|
34
|
+
expect(event.get("last")).to eq("bird")
|
|
35
|
+
expect(event.get("address")).to eq("sesame street")
|
|
37
36
|
end
|
|
38
37
|
end
|
|
39
38
|
|
|
@@ -48,9 +47,9 @@ describe LogStash::Codecs::CSV do
|
|
|
48
47
|
|
|
49
48
|
it "extract all the values" do
|
|
50
49
|
codec.decode(data) do |event|
|
|
51
|
-
expect(event
|
|
50
|
+
expect(event.get("custom1")).to eq("val1")
|
|
52
51
|
expect(event.to_hash).not_to include("custom2")
|
|
53
|
-
expect(event
|
|
52
|
+
expect(event.get("custom3")).to eq("val3")
|
|
54
53
|
end
|
|
55
54
|
end
|
|
56
55
|
end
|
|
@@ -65,9 +64,9 @@ describe LogStash::Codecs::CSV do
|
|
|
65
64
|
|
|
66
65
|
it "extract all the values" do
|
|
67
66
|
codec.decode(data) do |event|
|
|
68
|
-
expect(event
|
|
69
|
-
expect(event
|
|
70
|
-
expect(event
|
|
67
|
+
expect(event.get("custom1")).to eq("val1")
|
|
68
|
+
expect(event.get("custom2")).to eq("val2")
|
|
69
|
+
expect(event.get("column3")).to be_falsey
|
|
71
70
|
end
|
|
72
71
|
end
|
|
73
72
|
end
|
|
@@ -83,8 +82,8 @@ describe LogStash::Codecs::CSV do
|
|
|
83
82
|
|
|
84
83
|
it "return an event from CSV data" do
|
|
85
84
|
codec.decode(data) do |event|
|
|
86
|
-
expect(event
|
|
87
|
-
expect(event
|
|
85
|
+
expect(event.get("column1")).to eq("big,bird")
|
|
86
|
+
expect(event.get("column2")).to eq("sesame street")
|
|
88
87
|
end
|
|
89
88
|
end
|
|
90
89
|
end
|
|
@@ -98,9 +97,9 @@ describe LogStash::Codecs::CSV do
|
|
|
98
97
|
|
|
99
98
|
it "return an event from CSV data" do
|
|
100
99
|
codec.decode(data) do |event|
|
|
101
|
-
expect(event
|
|
102
|
-
expect(event
|
|
103
|
-
expect(event
|
|
100
|
+
expect(event.get("column1")).to eq("big")
|
|
101
|
+
expect(event.get("column2")).to eq("bird")
|
|
102
|
+
expect(event.get("column3")).to eq("sesame street")
|
|
104
103
|
end
|
|
105
104
|
end
|
|
106
105
|
|
|
@@ -110,9 +109,9 @@ describe LogStash::Codecs::CSV do
|
|
|
110
109
|
|
|
111
110
|
it "return an event from CSV data" do
|
|
112
111
|
codec.decode(data) do |event|
|
|
113
|
-
expect(event
|
|
114
|
-
expect(event
|
|
115
|
-
expect(event
|
|
112
|
+
expect(event.get("column1")).to eq("big")
|
|
113
|
+
expect(event.get("column2")).to eq("bird")
|
|
114
|
+
expect(event.get("column3")).to eq("sesame, street")
|
|
116
115
|
end
|
|
117
116
|
end
|
|
118
117
|
end
|
|
@@ -125,9 +124,9 @@ describe LogStash::Codecs::CSV do
|
|
|
125
124
|
|
|
126
125
|
it "return an event from CSV data" do
|
|
127
126
|
codec.decode(data) do |event|
|
|
128
|
-
expect(event
|
|
129
|
-
expect(event
|
|
130
|
-
expect(event
|
|
127
|
+
expect(event.get("column1")).to eq("big")
|
|
128
|
+
expect(event.get("column2")).to eq("bird")
|
|
129
|
+
expect(event.get("column3")).to eq('"sesame" street')
|
|
131
130
|
end
|
|
132
131
|
end
|
|
133
132
|
end
|
|
@@ -144,28 +143,15 @@ describe LogStash::Codecs::CSV do
|
|
|
144
143
|
end
|
|
145
144
|
|
|
146
145
|
let(:config) do
|
|
147
|
-
{ "
|
|
146
|
+
{ "autodetect_column_names" => true }
|
|
148
147
|
end
|
|
149
148
|
|
|
150
149
|
it "include header information when requested" do
|
|
151
150
|
codec.decode(data[0]) # Read the headers
|
|
152
151
|
codec.decode(data[1]) do |event|
|
|
153
|
-
expect(event
|
|
154
|
-
expect(event
|
|
155
|
-
expect(event
|
|
156
|
-
end
|
|
157
|
-
end
|
|
158
|
-
|
|
159
|
-
it "reset headers and fetch the new ones" do
|
|
160
|
-
data.each do |row|
|
|
161
|
-
codec.decode(row)
|
|
162
|
-
end
|
|
163
|
-
codec.reset
|
|
164
|
-
codec.decode(new_data[0]) # set the new headers
|
|
165
|
-
codec.decode(new_data[1]) do |event|
|
|
166
|
-
expect(event["host"]).to eq("example.com")
|
|
167
|
-
expect(event["country"]).to eq("germany")
|
|
168
|
-
expect(event["city"]).to eq("berlin")
|
|
152
|
+
expect(event.get("size")).to eq("big")
|
|
153
|
+
expect(event.get("animal")).to eq("bird")
|
|
154
|
+
expect(event.get("movie")).to eq("sesame street")
|
|
169
155
|
end
|
|
170
156
|
end
|
|
171
157
|
end
|
|
@@ -179,9 +165,9 @@ describe LogStash::Codecs::CSV do
|
|
|
179
165
|
|
|
180
166
|
it "get converted values to the expected type" do
|
|
181
167
|
codec.decode(data) do |event|
|
|
182
|
-
expect(event
|
|
183
|
-
expect(event
|
|
184
|
-
expect(event
|
|
168
|
+
expect(event.get("column1")).to eq(1234)
|
|
169
|
+
expect(event.get("column2")).to eq("bird")
|
|
170
|
+
expect(event.get("column3")).to eq(false)
|
|
185
171
|
end
|
|
186
172
|
end
|
|
187
173
|
|
|
@@ -194,13 +180,80 @@ describe LogStash::Codecs::CSV do
|
|
|
194
180
|
|
|
195
181
|
it "get converted values to the expected type" do
|
|
196
182
|
codec.decode(data) do |event|
|
|
197
|
-
expect(event
|
|
198
|
-
expect(event
|
|
199
|
-
expect(event
|
|
183
|
+
expect(event.get("custom1")).to eq(1234)
|
|
184
|
+
expect(event.get("custom2")).to eq("bird")
|
|
185
|
+
expect(event.get("custom3")).to eq(false)
|
|
200
186
|
end
|
|
201
187
|
end
|
|
202
188
|
end
|
|
203
189
|
end
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
describe "encode" do
|
|
193
|
+
context "not including headers" do
|
|
194
|
+
let(:event) { LogStash::Event.new({"f1" => "v1", "f2" => "v2"}) }
|
|
195
|
+
|
|
196
|
+
context "without columns" do
|
|
197
|
+
let(:config) do
|
|
198
|
+
{ "include_headers" => false, "columns" => [] }
|
|
199
|
+
end
|
|
204
200
|
|
|
201
|
+
it "should encode to single CSV line" do
|
|
202
|
+
codec.on_event do |e, d|
|
|
203
|
+
expect(d.chomp.split(",").sort).to eq("v1,v2,1,#{event.timestamp}".split(",").sort)
|
|
204
|
+
end
|
|
205
|
+
codec.encode(event)
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
context "with columns" do
|
|
210
|
+
let(:config) do
|
|
211
|
+
{ "include_headers" => false, "columns" => ["f1", "f2"] }
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it "should encode to single CSV line" do
|
|
215
|
+
codec.on_event do |e, d|
|
|
216
|
+
expect(d).to eq("v1,v2\n")
|
|
217
|
+
end
|
|
218
|
+
codec.encode(event)
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
context "including headers" do
|
|
224
|
+
let(:event) { LogStash::Event.new({"f1" => "v1", "f2" => "v2"}) }
|
|
225
|
+
|
|
226
|
+
context "without columns" do
|
|
227
|
+
let(:config) do
|
|
228
|
+
{ "include_headers" => true, "columns" => [] }
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
it "should encode to two CSV line" do
|
|
232
|
+
lines = []
|
|
233
|
+
codec.on_event do |e, d|
|
|
234
|
+
lines << d
|
|
235
|
+
end
|
|
236
|
+
codec.encode(event)
|
|
237
|
+
expect(lines[0].chomp.split(",").sort).to eq("f1,f2,@version,@timestamp".split(",").sort)
|
|
238
|
+
expect(lines[1].chomp.split(",").sort).to eq("v1,v2,1,#{event.timestamp}".split(",").sort)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
context "with columns" do
|
|
243
|
+
let(:config) do
|
|
244
|
+
{ "include_headers" => true, "columns" => ["f1", "f2"] }
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
it "should encode to two CSV line" do
|
|
248
|
+
lines = []
|
|
249
|
+
codec.on_event do |e, d|
|
|
250
|
+
lines << d
|
|
251
|
+
end
|
|
252
|
+
codec.encode(event)
|
|
253
|
+
expect(lines[0]).to eq("f1,f2\n")
|
|
254
|
+
expect(lines[1]).to eq("v1,v2\n")
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
205
258
|
end
|
|
206
259
|
end
|
metadata
CHANGED
|
@@ -1,29 +1,35 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: logstash-codec-csv
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 1.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Elasticsearch
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2020-02-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
|
15
15
|
requirements:
|
|
16
|
-
- - "
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.60'
|
|
19
|
+
- - "<="
|
|
17
20
|
- !ruby/object:Gem::Version
|
|
18
|
-
version: '
|
|
21
|
+
version: '2.99'
|
|
19
22
|
name: logstash-core-plugin-api
|
|
20
23
|
prerelease: false
|
|
21
24
|
type: :runtime
|
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
26
|
requirements:
|
|
24
|
-
- - "
|
|
27
|
+
- - ">="
|
|
28
|
+
- !ruby/object:Gem::Version
|
|
29
|
+
version: '1.60'
|
|
30
|
+
- - "<="
|
|
25
31
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '
|
|
32
|
+
version: '2.99'
|
|
27
33
|
- !ruby/object:Gem::Dependency
|
|
28
34
|
requirement: !ruby/object:Gem::Requirement
|
|
29
35
|
requirements:
|
|
@@ -38,7 +44,9 @@ dependencies:
|
|
|
38
44
|
- - ">="
|
|
39
45
|
- !ruby/object:Gem::Version
|
|
40
46
|
version: '0'
|
|
41
|
-
description: This gem is a Logstash plugin required to be installed on top of the
|
|
47
|
+
description: This gem is a Logstash plugin required to be installed on top of the
|
|
48
|
+
Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This
|
|
49
|
+
gem is not a stand-alone program
|
|
42
50
|
email: info@elasticsearch.com
|
|
43
51
|
executables: []
|
|
44
52
|
extensions: []
|
|
@@ -75,7 +83,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
75
83
|
version: '0'
|
|
76
84
|
requirements: []
|
|
77
85
|
rubyforge_project:
|
|
78
|
-
rubygems_version: 2.
|
|
86
|
+
rubygems_version: 2.6.13
|
|
79
87
|
signing_key:
|
|
80
88
|
specification_version: 4
|
|
81
89
|
summary: The csv codec take CSV data, parses it and passes it away
|