logstash-codec-json_stream 0.0.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: c0e43af36dd5cac97f6d09ef5cb84974e1d3892d
4
- data.tar.gz: 518a4e627e794a2235f484efac865884b9ad1c92
2
+ SHA256:
3
+ metadata.gz: b844cfe3444a066af5ab9b7c0da943da2822cdd490c4b5c9bee8c2889c95cb3c
4
+ data.tar.gz: a40e7766d69b01e601fdf4231b013c65c1db10e8e824b1ab2ff9fafa25dab5b6
5
5
  SHA512:
6
- metadata.gz: 0c9686b8e0ef66c0a42456382ad4c0526b3807aeb3a39e52d0529e66f06519141b485b3d746027370c333ae8416c73d886d6e0c1c8dc133dd9462f47e1541e6f
7
- data.tar.gz: 6786835c54bb2af7e91cfe41232595bcc5baa044633b6ae0b411698d366d9dc8243207e3a98d8aee032232878581c79135a38c6d6fdcd964140b293993e73cdf
6
+ metadata.gz: cd984ba16e049cfedd692ad99f76462acc2ff493b359a8e3a09b8530af0cb1fcec49ef00d2823a7b5fe2221a37536b237275b89514a2fcc3da64173c47ba7636
7
+ data.tar.gz: 75252622493230be43d863a66e99a7e3fbb1f219d0f1bf8571c4f6686fe780825b1b5eaa497de6b45516a04bf143b928c3f1168fed623f2dce272d7bbf9e3fc3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ## 1.0.0
2
+ - Add spec tests thanks to johannesthoenes4000 & thomasklinger123
3
+ - Add a better parsing method thanks to johannesthoenes4000
1
4
  ## 0.0.1
2
5
  - Forked from Json_lines codec by elastic @elastic: Thank you
3
6
  - Added bracket counter to extract jsons.
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Logstash Plugin
2
2
 
3
- [![Travis Build Status](https://travis-ci.org/logstash-plugins/logstash-codec-json_lines.svg)](https://travis-ci.org/logstash-plugins/logstash-codec-json_lines)
3
+ [![Travis Build Status](https://travis-ci.org/cherweg/logstash-codec-json_stream.svg?branch=master)](https://travis-ci.org/cherweg/logstash-codec-json_stream)
4
4
 
5
5
  This is a plugin for [Logstash](https://github.com/elastic/logstash).
6
6
 
@@ -1,20 +1,18 @@
1
1
  # encoding: utf-8
2
2
  require "logstash/codecs/base"
3
3
  require "logstash/util/charset"
4
- require "logstash/util/buftok"
5
4
  require "logstash/json"
6
5
 
6
+
7
7
  # This codec will decode streamed JSON that is not delimited.
8
8
  # Encoding will emit a single JSON string ending in a `@delimiter`
9
9
 
10
10
  class LogStash::Codecs::JSONStream < LogStash::Codecs::Base
11
+
11
12
  config_name "json_stream"
12
13
 
13
14
  config :charset, :validate => ::Encoding.name_list, :default => "UTF-8"
14
15
 
15
- # Change the delimiter that separates lines
16
- config :delimiter, :validate => :string, :default => "\n"
17
-
18
16
  public
19
17
 
20
18
  def register
@@ -22,33 +20,25 @@ class LogStash::Codecs::JSONStream < LogStash::Codecs::Base
22
20
  @converter.logger = @logger
23
21
  end
24
22
 
25
- def decode(data, &block)
26
- io = StringIO.new data
27
-
28
- loop.inject(counter: 0, string: '') do |acc|
29
- char = io.getc
30
-
31
- break if char.nil? # EOF
32
- next acc if acc[:counter].zero? && char != '{' # between objects
33
-
34
- acc[:string] << char
35
-
36
- if char == '}' && (acc[:counter] -= 1).zero?
37
- # ⇓⇓⇓ # CALLBACK, feel free to JSON.parse here
38
- parse(@converter.convert(acc[:string].gsub(/\p{Space}+/, ' ')), &block)
39
- next {counter: 0, string: ''} # from scratch
40
- end
41
-
42
- acc.tap do |result|
43
- result[:counter] += 1 if char == '{'
44
- end
45
- end
23
+ def decode(concatenated_json, &block)
24
+ decode_unsafe(concatenated_json, &block)
25
+ rescue LogStash::Json::ParserError => e
26
+ @logger.error("JSON parse error for json stream / concatenated json, original data now in message field", :error => e, :data => concatenated_json)
27
+ yield LogStash::Event.new("message" => concatenated_json, "tags" => ["_jsonparsefailure"])
28
+ rescue StandardError => e
29
+ # This should NEVER happen. But hubris has been the cause of many pipeline breaking things
30
+ # If something bad should happen we just don't want to crash logstash here.
31
+ @logger.error(
32
+ "An unexpected error occurred parsing JSON data",
33
+ :data => concatenated_json,
34
+ :message => e.message,
35
+ :class => e.class.name,
36
+ :backtrace => e.backtrace
37
+ )
46
38
  end
47
39
 
48
40
  def encode(event)
49
- # Tack on a @delimiter for now because previously most of logstash's JSON
50
- # outputs emitted one per line, and whitespace is OK in json.
51
- @on_event.call(event, "#{event.to_json}#{@delimiter}")
41
+ @logger.error("Encoding is not supported by 'concatenated_json' plugin yet")
52
42
  end
53
43
 
54
44
  def flush(&block)
@@ -56,26 +46,10 @@ class LogStash::Codecs::JSONStream < LogStash::Codecs::Base
56
46
  end
57
47
 
58
48
  private
59
-
60
- # from_json_parse uses the Event#from_json method to deserialize and directly produce events
61
- def from_json_parse(json, &block)
62
- LogStash::Event.from_json(json).each { |event| yield event }
63
- rescue LogStash::Json::ParserError => e
64
- @logger.warn("JSON parse error, original data now in message field", :error => e, :data => json)
65
- yield LogStash::Event.new("message" => json, "tags" => ["_jsonparsefailure"])
66
- end
67
-
68
- # legacy_parse uses the LogStash::Json class to deserialize json
69
- def legacy_parse(json, &block)
70
- # ignore empty/blank lines which LogStash::Json#load returns as nil
71
- o = LogStash::Json.load(json)
72
- yield(LogStash::Event.new(o)) if o
73
- rescue LogStash::Json::ParserError => e
74
- @logger.warn("JSON parse error, original data now in message field", :error => e, :data => json)
75
- yield LogStash::Event.new("message" => json, "tags" => ["_jsonparsefailure"])
49
+ def decode_unsafe(concatenated_json)
50
+ array_json = @converter.convert("[#{concatenated_json.gsub('}{', '},{')}]")
51
+ LogStash::Json.load(array_json).each do |decoded_event|
52
+ yield(LogStash::Event.new(decoded_event))
53
+ end
76
54
  end
77
-
78
- # keep compatibility with all v2.x distributions. only in 2.3 will the Event#from_json method be introduced
79
- # and we need to keep compatibility for all v2 releases.
80
- alias_method :parse, LogStash::Event.respond_to?(:from_json) ? :from_json_parse : :legacy_parse
81
55
  end
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
 
3
3
  s.name = 'logstash-codec-json_stream'
4
- s.version = '0.0.2'
4
+ s.version = '1.0.0'
5
5
  s.licenses = ['Apache License (2.0)']
6
6
  s.summary = "Reads and writes non JSON Streams"
7
7
  s.description = "This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program"
@@ -0,0 +1,91 @@
1
+ # encoding: utf-8
2
+ require "logstash/devutils/rspec/spec_helper"
3
+ require "logstash/codecs/json_stream"
4
+ require "logstash/event"
5
+ require "logstash/json"
6
+ require "insist"
7
+
8
+ describe LogStash::Codecs::JSONStream do
9
+
10
+ class LogStash::Codecs::JSONStream
11
+ public :decode_unsafe # use method without error logging for better visibility of errors
12
+ end
13
+
14
+ let(:codec_options) { {} }
15
+
16
+ context "default parser choice" do
17
+ subject do
18
+ LogStash::Codecs::JSONStream.new(codec_options)
19
+ end
20
+
21
+ it "should read multiple events" do
22
+ events = events_from_string(<<-EOS
23
+ {"messageType": "CONTROL_MESSAGE", "message": "foo"}
24
+ {"messageType": "DATA_MESSAGE", "logGroup": "testing", "logEvents": [
25
+ {"id": "4711", "@timestamp": "2018-06-18T13:36:25.484+00:00", "message": "{\\"tasks\\": \\"READING\\"}"},
26
+ {"id": "1848", "@timestamp": "1989-11-09T23:59:25.484+02:00", "message": "{\\"tasks\\": \\"WRITING\\"}"}
27
+ ]}
28
+ EOS
29
+ )
30
+ insist { events.size } == 2
31
+
32
+ control_event = events[0]
33
+ data_event = events[1]
34
+
35
+ insist { control_event.is_a? LogStash::Event }
36
+ insist { control_event.get("messageType") } == "CONTROL_MESSAGE"
37
+ insist { control_event.get("message") } == "foo"
38
+
39
+ insist { data_event.is_a? LogStash::Event }
40
+ insist { data_event.get("messageType") } == "DATA_MESSAGE"
41
+ insist { data_event.get("logGroup") } == "testing"
42
+ insist { data_event.get("logEvents").size } == 2
43
+
44
+ insist { data_event.get("logEvents")[0]['id'] } == '4711'
45
+ insist { data_event.get("logEvents")[0]['@timestamp'] } == '2018-06-18T13:36:25.484+00:00'
46
+ insist { data_event.get("logEvents")[0]['message'] } == '{"tasks": "READING"}'
47
+
48
+ insist { data_event.get("logEvents")[1]['id'] } == '1848'
49
+ insist { data_event.get("logEvents")[1]['@timestamp'] } == '1989-11-09T23:59:25.484+02:00'
50
+ insist { data_event.get("logEvents")[1]['message'] } == '{"tasks": "WRITING"}'
51
+ end
52
+
53
+ it "should read multiple events from data" do
54
+ events = events_from_file('log-stream.valid-line-formatted')
55
+ insist { events.size } == 5
56
+
57
+ events.each do |event|
58
+ insist { event.is_a? LogStash::Event }
59
+ insist { event.get("logGroup") } == "test-core"
60
+ insist { event.get("messageType") } == "DATA_MESSAGE"
61
+ insist { event.get("logEvents").size } != 0
62
+ event.get("logEvents").each do |event|
63
+ insist { event["id"] } != nil
64
+ insist { event["message"] } != nil
65
+ end
66
+ end
67
+ end
68
+
69
+ it "should not fail with stacktrace" do
70
+ events = events_from_file('log-stream.minimal-failure-formatted')
71
+ insist { events.size } == 1
72
+
73
+ insist { events[0].is_a? LogStash::Event }
74
+ insist { events[0].get("logEvents").size } == 1
75
+ insist { events[0].get("logEvents")[0]['message'] =~ /Failed at: $\{springMacroRequestContext.getMessag\.\.\./ }
76
+ end
77
+ end
78
+
79
+ private
80
+ def events_from_file fixture_logfile_name
81
+ data = IO.read(File.join(File.dirname(__FILE__), "../../fixtures/#{fixture_logfile_name}"))
82
+ events_from_string data
83
+ end
84
+
85
+ def events_from_string data
86
+ events = []
87
+ data_without_formatting = data.gsub(/(\n|\s{2,})/, '')
88
+ subject.decode_unsafe(data_without_formatting) { |event| events << event }
89
+ events
90
+ end
91
+ end
metadata CHANGED
@@ -1,22 +1,22 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-codec-json_stream
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christian Herweg
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-04-23 00:00:00.000000000 Z
11
+ date: 2018-06-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
15
15
  requirements:
16
- - - '>='
16
+ - - ">="
17
17
  - !ruby/object:Gem::Version
18
18
  version: '1.60'
19
- - - <=
19
+ - - "<="
20
20
  - !ruby/object:Gem::Version
21
21
  version: '2.99'
22
22
  name: logstash-core-plugin-api
@@ -24,16 +24,16 @@ dependencies:
24
24
  type: :runtime
25
25
  version_requirements: !ruby/object:Gem::Requirement
26
26
  requirements:
27
- - - '>='
27
+ - - ">="
28
28
  - !ruby/object:Gem::Version
29
29
  version: '1.60'
30
- - - <=
30
+ - - "<="
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2.99'
33
33
  - !ruby/object:Gem::Dependency
34
34
  requirement: !ruby/object:Gem::Requirement
35
35
  requirements:
36
- - - '>='
36
+ - - ">="
37
37
  - !ruby/object:Gem::Version
38
38
  version: 2.1.0
39
39
  name: logstash-codec-line
@@ -41,13 +41,13 @@ dependencies:
41
41
  type: :runtime
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
- - - '>='
44
+ - - ">="
45
45
  - !ruby/object:Gem::Version
46
46
  version: 2.1.0
47
47
  - !ruby/object:Gem::Dependency
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  requirements:
50
- - - '>='
50
+ - - ">="
51
51
  - !ruby/object:Gem::Version
52
52
  version: '0'
53
53
  name: logstash-devutils
@@ -55,10 +55,12 @@ dependencies:
55
55
  type: :development
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
- - - '>='
58
+ - - ">="
59
59
  - !ruby/object:Gem::Version
60
60
  version: '0'
61
- description: This gem is a Logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This gem is not a stand-alone program
61
+ description: This gem is a Logstash plugin required to be installed on top of the
62
+ Logstash core pipeline using $LS_HOME/bin/logstash-plugin install gemname. This
63
+ gem is not a stand-alone program
62
64
  email: christian.herweg@gmail.com
63
65
  executables: []
64
66
  extensions: []
@@ -73,7 +75,7 @@ files:
73
75
  - docs/index.asciidoc
74
76
  - lib/logstash/codecs/json_stream.rb
75
77
  - logstash-codec-json_stream.gemspec
76
- - spec/codecs/json_lines_spec.rb
78
+ - spec/codecs/json_stream_spec.rb
77
79
  homepage: https://github.com/cherweg/logstash-codec-json_stream
78
80
  licenses:
79
81
  - Apache License (2.0)
@@ -86,19 +88,19 @@ require_paths:
86
88
  - lib
87
89
  required_ruby_version: !ruby/object:Gem::Requirement
88
90
  requirements:
89
- - - '>='
91
+ - - ">="
90
92
  - !ruby/object:Gem::Version
91
93
  version: '0'
92
94
  required_rubygems_version: !ruby/object:Gem::Requirement
93
95
  requirements:
94
- - - '>='
96
+ - - ">="
95
97
  - !ruby/object:Gem::Version
96
98
  version: '0'
97
99
  requirements: []
98
100
  rubyforge_project:
99
- rubygems_version: 2.4.5
101
+ rubygems_version: 2.6.13
100
102
  signing_key:
101
103
  specification_version: 4
102
104
  summary: Reads and writes non JSON Streams
103
105
  test_files:
104
- - spec/codecs/json_lines_spec.rb
106
+ - spec/codecs/json_stream_spec.rb
@@ -1,237 +0,0 @@
1
- # encoding: utf-8
2
- require "logstash/devutils/rspec/spec_helper"
3
- require "logstash/codecs/json_lines"
4
- require "logstash/event"
5
- require "logstash/json"
6
- require "insist"
7
-
8
- describe LogStash::Codecs::JSONLines do
9
-
10
- let(:codec_options) { {} }
11
-
12
- shared_examples :codec do
13
-
14
- context "#decode" do
15
- it "should return an event from json data" do
16
- data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}}
17
- subject.decode(LogStash::Json.dump(data) + "\n") do |event|
18
- insist { event.is_a? LogStash::Event }
19
- insist { event.get("foo") } == data["foo"]
20
- insist { event.get("baz") } == data["baz"]
21
- insist { event.get("bah") } == data["bah"]
22
- end
23
- end
24
-
25
- it "should return an event from json data when a newline is recieved" do
26
- data = {"foo" => "bar", "baz" => {"bah" => ["a","b","c"]}}
27
- subject.decode(LogStash::Json.dump(data)) do |event|
28
- insist {false}
29
- end
30
- subject.decode("\n") do |event|
31
- insist { event.is_a? LogStash::Event }
32
- insist { event.get("foo") } == data["foo"]
33
- insist { event.get("baz") } == data["baz"]
34
- insist { event.get("bah") } == data["bah"]
35
- end
36
- end
37
-
38
- context "when using custom delimiter" do
39
- let(:delimiter) { "|" }
40
- let(:line) { "{\"hey\":1}|{\"hey\":2}|{\"hey\":3}|" }
41
- let(:codec_options) { { "delimiter" => delimiter } }
42
-
43
- it "should decode multiple lines separated by the delimiter" do
44
- result = []
45
- subject.decode(line) { |event| result << event }
46
- expect(result.size).to eq(3)
47
- expect(result[0].get("hey")).to eq(1)
48
- expect(result[1].get("hey")).to eq(2)
49
- expect(result[2].get("hey")).to eq(3)
50
- end
51
- end
52
-
53
- context "processing plain text" do
54
- it "falls back to plain text" do
55
- decoded = false
56
- subject.decode("something that isn't json\n") do |event|
57
- decoded = true
58
- insist { event.is_a?(LogStash::Event) }
59
- insist { event.get("message") } == "something that isn't json"
60
- insist { event.get("tags") }.include?("_jsonparsefailure")
61
- end
62
- insist { decoded } == true
63
- end
64
- end
65
-
66
- context "processing weird binary blobs" do
67
- it "falls back to plain text and doesn't crash (LOGSTASH-1595)" do
68
- decoded = false
69
- blob = (128..255).to_a.pack("C*").force_encoding("ASCII-8BIT")
70
- subject.decode(blob)
71
- subject.decode("\n") do |event|
72
- decoded = true
73
- insist { event.is_a?(LogStash::Event) }
74
- insist { event.get("message").encoding.to_s } == "UTF-8"
75
- end
76
- insist { decoded } == true
77
- end
78
- end
79
-
80
- context "when json could not be parsed" do
81
- let(:message) { "random_message\n" }
82
-
83
- it "add the failure tag" do
84
- subject.decode(message) do |event|
85
- expect(event).to include "tags"
86
- end
87
- end
88
-
89
- it "uses an array to store the tags" do
90
- subject.decode(message) do |event|
91
- expect(event.get('tags')).to be_a Array
92
- end
93
- end
94
-
95
- it "add a json parser failure tag" do
96
- subject.decode(message) do |event|
97
- expect(event.get('tags')).to include "_jsonparsefailure"
98
- end
99
- end
100
- end
101
-
102
- context "blank lines" do
103
- let(:collector) { Array.new }
104
-
105
- it "should ignore bare blanks" do
106
- subject.decode("\n\n") do |event|
107
- collector.push(event)
108
- end
109
- expect(collector.size).to eq(0)
110
- end
111
-
112
- it "should ignore in between blank lines" do
113
- subject.decode("\n{\"a\":1}\n\n{\"b\":2}\n\n") do |event|
114
- collector.push(event)
115
- end
116
- expect(collector.size).to eq(2)
117
- end
118
- end
119
-
120
- end
121
-
122
- context "#encode" do
123
- let(:data) { { LogStash::Event::TIMESTAMP => "2015-12-07T11:37:00.000Z", "foo" => "bar", "baz" => {"bah" => ["a","b","c"]}} }
124
- let(:event) { LogStash::Event.new(data) }
125
-
126
- it "should return json data" do
127
- got_event = false
128
- subject.on_event do |e, d|
129
- insist { d } == "#{LogStash::Event.new(data).to_json}\n"
130
- insist { LogStash::Json.load(d)["foo"] } == data["foo"]
131
- insist { LogStash::Json.load(d)["baz"] } == data["baz"]
132
- insist { LogStash::Json.load(d)["bah"] } == data["bah"]
133
- got_event = true
134
- end
135
- subject.encode(event)
136
- insist { got_event }
137
- end
138
-
139
- context "when using custom delimiter" do
140
- let(:delimiter) { "|" }
141
- let(:codec_options) { { "delimiter" => delimiter } }
142
-
143
- it "should decode multiple lines separated by the delimiter" do
144
- subject.on_event do |e, d|
145
- insist { d } == "#{LogStash::Event.new(data).to_json}#{delimiter}"
146
- end
147
- subject.encode(event)
148
- end
149
- end
150
- end
151
-
152
- context 'reading from a simulated multiline json file without last newline' do
153
- let(:input) do
154
- %{{"field": "value1"}
155
- {"field": "value2"}}
156
- end
157
-
158
- let(:collector) { Array.new }
159
-
160
- it 'should generate one event' do
161
- subject.decode(input) do |event|
162
- collector.push(event)
163
- end
164
- expect(collector.size).to eq(1)
165
- expect(collector.first.get('field')).to eq('value1')
166
- end
167
- end
168
-
169
- context 'reading from a simulated multiline json file with last newline' do
170
- let(:input) do
171
- %{{"field": "value1"}
172
- {"field": "value2"}
173
- }
174
- end
175
-
176
- let(:collector) { Array.new }
177
-
178
- it 'should generate two events' do
179
- subject.decode(input) do |event|
180
- collector.push(event)
181
- end
182
- expect(collector.size).to eq(2)
183
- expect(collector.first.get('field')).to eq('value1')
184
- expect(collector.last.get('field')).to eq('value2')
185
- end
186
- end
187
-
188
- end
189
-
190
- context "forcing legacy parsing" do
191
- it_behaves_like :codec do
192
- subject do
193
- # register method is called in the constructor
194
- LogStash::Codecs::JSONLines.new(codec_options)
195
- end
196
-
197
- before(:each) do
198
- # stub codec parse method to force use of the legacy parser.
199
- # this is very implementation specific but I am not sure how
200
- # this can be tested otherwise.
201
- allow(subject).to receive(:parse) do |line, &block|
202
- subject.send(:legacy_parse, line, &block)
203
- end
204
- end
205
- end
206
- end
207
-
208
- context "default parser choice" do
209
- # here we cannot force the use of the Event#from_json since if this test is run in the
210
- # legacy context (no Java Event) it will fail but if in the new context, it will be picked up.
211
- it_behaves_like :codec do
212
- subject do
213
- # register method is called in the constructor
214
- LogStash::Codecs::JSONLines.new(codec_options)
215
- end
216
- end
217
-
218
- context "flush" do
219
- subject do
220
- LogStash::Codecs::JSONLines.new(codec_options)
221
- end
222
-
223
- let(:input) { "{\"foo\":\"bar\"}" }
224
-
225
- it "should flush buffered data'" do
226
- result = []
227
- subject.decode(input) { |e| result << e }
228
- expect(result.size).to eq(0)
229
-
230
- subject.flush { |e| result << e }
231
- expect(result.size).to eq(1)
232
-
233
- expect(result[0].get("foo")).to eq("bar")
234
- end
235
- end
236
- end
237
- end