logstash-input-azureblob 0.9.12-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +253 -0
- data/lib/com/microsoft/json-parser.rb +202 -0
- data/lib/logstash-input-azureblob_jars.rb +10 -0
- data/lib/logstash/inputs/azureblob.rb +500 -0
- data/lib/org/glassfish/javax.json/1.1/javax.json-1.1.jar +0 -0
- data/logstash-input-azureblob.gemspec +32 -0
- data/spec/com/microsoft/json-parser_spec.rb +280 -0
- data/spec/inputs/azureblob_spec.rb +324 -0
- metadata +165 -0
Binary file
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-input-azureblob'
|
3
|
+
s.version = '0.9.12'
|
4
|
+
s.platform = "java"
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = 'This plugin collects Microsoft Azure Diagnostics data from Azure Storage Blobs.'
|
7
|
+
s.description = 'This gem is a Logstash plugin. It reads and parses data from Azure Storage Blobs.'
|
8
|
+
s.homepage = 'https://github.com/Azure/azure-diagnostics-tools'
|
9
|
+
s.authors = ['Microsoft Corporation']
|
10
|
+
s.email = 'azdiag@microsoft.com'
|
11
|
+
s.require_paths = ['lib']
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
|
15
|
+
# Tests
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency "logstash-core-plugin-api", '>= 1.60', '<= 2.99'
|
23
|
+
s.add_runtime_dependency 'logstash-codec-json_lines'
|
24
|
+
s.add_runtime_dependency 'stud', '>= 0.0.22'
|
25
|
+
s.add_runtime_dependency 'azure-storage', '~> 0.12.3.preview'
|
26
|
+
s.add_development_dependency 'logstash-devutils'
|
27
|
+
s.add_development_dependency 'logging'
|
28
|
+
|
29
|
+
# Jar dependencies
|
30
|
+
s.requirements << "jar 'org.glassfish:javax.json', '1.1'"
|
31
|
+
s.add_runtime_dependency 'jar-dependencies'
|
32
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "logging"
|
3
|
+
require "com/microsoft/json-parser"
|
4
|
+
|
5
|
+
describe JsonParser do
|
6
|
+
before(:each) do
|
7
|
+
@logger = Logging.logger(STDOUT)
|
8
|
+
@logger.level = :debug
|
9
|
+
@linear_reader = spy
|
10
|
+
|
11
|
+
@on_content = double
|
12
|
+
@on_error = double
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct(json_str)
|
16
|
+
@linear_reader_index = 0
|
17
|
+
allow(@linear_reader).to receive(:read) do ->(){
|
18
|
+
start_index = @linear_reader_index
|
19
|
+
@linear_reader_index = @linear_reader_index + 42
|
20
|
+
return json_str[start_index..@linear_reader_index - 1], @linear_reader_index < json_str.length ? true : false
|
21
|
+
}.call
|
22
|
+
end
|
23
|
+
return JsonParser.new(@logger, @linear_reader)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'can parse a complete JSON' do
|
27
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
28
|
+
parser = construct(json_str)
|
29
|
+
|
30
|
+
expect(@on_error).to_not receive(:call)
|
31
|
+
expect(@on_content).to receive(:call).with(json_str).once
|
32
|
+
|
33
|
+
parser.parse(@on_content, @on_error)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'can parse multiple JSON objects' do
|
37
|
+
json_strings = [
|
38
|
+
"{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}",
|
39
|
+
"{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}",
|
40
|
+
"{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}",
|
41
|
+
"\n\r{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}",
|
42
|
+
" {\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}",
|
43
|
+
" {\"abc\" :\"xyz\"}"
|
44
|
+
]
|
45
|
+
content = ""
|
46
|
+
json_strings.each do |str|
|
47
|
+
content << str
|
48
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
49
|
+
end
|
50
|
+
expect(@on_error).to_not receive(:call)
|
51
|
+
|
52
|
+
parser = construct(content)
|
53
|
+
|
54
|
+
parser.parse(@on_content, @on_error)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'will ignore regular text' do
|
58
|
+
not_a_json = "not a json"
|
59
|
+
parser = construct(not_a_json)
|
60
|
+
skipped_bytes = 0
|
61
|
+
expect(@on_content).to_not receive(:call)
|
62
|
+
|
63
|
+
received_malformed_str = ""
|
64
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
65
|
+
received_malformed_str << malformed_json
|
66
|
+
end
|
67
|
+
|
68
|
+
parser.parse(@on_content, @on_error)
|
69
|
+
|
70
|
+
expect(received_malformed_str).to eq(not_a_json)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'will ignore malformed JSON' do
|
74
|
+
not_a_json = "{\"entity\":{ \"number\":42, \"string\":\"comma is missing here ->\" \"<- here\":null }}"
|
75
|
+
parser = construct(not_a_json)
|
76
|
+
skipped_bytes = 0
|
77
|
+
expect(@on_content).to_not receive(:call)
|
78
|
+
|
79
|
+
received_malformed_str = ""
|
80
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
81
|
+
received_malformed_str << malformed_json
|
82
|
+
end
|
83
|
+
|
84
|
+
parser.parse(@on_content, @on_error)
|
85
|
+
|
86
|
+
expect(received_malformed_str).to eq(not_a_json)
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'will skip comma between JSONs' do
|
90
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
91
|
+
not_a_json = ","
|
92
|
+
parser = construct(json_str+not_a_json+json_str)
|
93
|
+
|
94
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
95
|
+
expect(@on_error).to receive(:call).with(",").once.ordered
|
96
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
97
|
+
|
98
|
+
parser.parse(@on_content, @on_error)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'will skip regular text in the middle' do
|
102
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
103
|
+
not_a_json = "not a json"
|
104
|
+
parser = construct(json_str+not_a_json+json_str)
|
105
|
+
|
106
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
107
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
108
|
+
|
109
|
+
received_malformed_str = ""
|
110
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
111
|
+
received_malformed_str << malformed_json
|
112
|
+
end
|
113
|
+
|
114
|
+
parser.parse(@on_content, @on_error)
|
115
|
+
|
116
|
+
expect(received_malformed_str).to eq(not_a_json)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'can parse multiple JSON objects in between malformed content' do
|
120
|
+
strings = [
|
121
|
+
[ true, "{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}"],
|
122
|
+
[ true, "{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}"],
|
123
|
+
[ false, ","],
|
124
|
+
[ true, "{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}"],
|
125
|
+
[ false, "some random text \n\r"],
|
126
|
+
[ true, "{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}"],
|
127
|
+
[ false, "{\"entity\":{ \"number\":42, \"string\":\"some string\" \"val\":null }} "],
|
128
|
+
[ true, "{\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}"],
|
129
|
+
[ true, " {\"abc\" :\"xyz\"}"]
|
130
|
+
]
|
131
|
+
content = ""
|
132
|
+
strings.each do |is_valid_json, str|
|
133
|
+
content << str
|
134
|
+
if is_valid_json
|
135
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
136
|
+
else
|
137
|
+
end
|
138
|
+
end
|
139
|
+
allow(@on_error).to receive(:call)
|
140
|
+
|
141
|
+
parser = construct(content)
|
142
|
+
|
143
|
+
parser.parse(@on_content, @on_error)
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'will batch together malformed content in a single callback' do
|
147
|
+
strings = [
|
148
|
+
[ true, "{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}"],
|
149
|
+
[ true, "{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}"],
|
150
|
+
[ false, ","],
|
151
|
+
[ true, "{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}"],
|
152
|
+
[ false, "some random text \n\r"], #whitespace after malformed data will be part of the malformed string
|
153
|
+
[ true, "{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}"],
|
154
|
+
[ false, "{\"entity\":{ \"number\":42, \"string\":\"some string\" \"val\":null }} "],
|
155
|
+
[ true, "{\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}"],
|
156
|
+
[ true, "\n\r {\"abc\" :\"xyz\"}"] # whitespace after correct jsons will be part of the next json
|
157
|
+
]
|
158
|
+
content = ""
|
159
|
+
strings.each do |is_valid_json, str|
|
160
|
+
content << str
|
161
|
+
if is_valid_json
|
162
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
163
|
+
else
|
164
|
+
expect(@on_error).to receive(:call).with(str).ordered
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
parser = construct(content)
|
169
|
+
|
170
|
+
parser.parse(@on_content, @on_error)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe StreamReader do
|
175
|
+
before(:each) do
|
176
|
+
@logger = Logging.logger(STDOUT)
|
177
|
+
@logger.level = :debug
|
178
|
+
|
179
|
+
@linear_reader = double
|
180
|
+
@stream_reader = StreamReader.new(@logger, @linear_reader)
|
181
|
+
end
|
182
|
+
|
183
|
+
it 'does not support mark' do
|
184
|
+
expect(@stream_reader.markSupported).to eq(false)
|
185
|
+
end
|
186
|
+
|
187
|
+
it 'can read full stream' do
|
188
|
+
full_content = "entire content"
|
189
|
+
input_buffer = Java::char[full_content.length].new
|
190
|
+
|
191
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
192
|
+
|
193
|
+
@stream_reader.read(input_buffer, 0, full_content.length)
|
194
|
+
|
195
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
196
|
+
end
|
197
|
+
|
198
|
+
it 'reads until requested buffer is filled' do
|
199
|
+
full_content = "entire content"
|
200
|
+
input_buffer = Java::char[full_content.length].new
|
201
|
+
|
202
|
+
expect(@linear_reader).to receive(:read).twice.and_return([full_content[0..full_content.length/2], true],[full_content[full_content.length/2 + 1..-1], true])
|
203
|
+
|
204
|
+
@stream_reader.read(input_buffer, 0, full_content.length)
|
205
|
+
|
206
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
207
|
+
end
|
208
|
+
|
209
|
+
it 'does not call the read callback when buffer length is 0' do
|
210
|
+
expect(@linear_reader).to_not receive(:read)
|
211
|
+
|
212
|
+
@stream_reader.read(nil, 0, 0)
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'caches if it reads ahead' do
|
216
|
+
full_content = "entire content"
|
217
|
+
input_buffer = Java::char[full_content.length].new
|
218
|
+
|
219
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
220
|
+
|
221
|
+
(0..full_content.length - 1).each do |i|
|
222
|
+
@stream_reader.read(input_buffer, i, 1)
|
223
|
+
end
|
224
|
+
|
225
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
226
|
+
end
|
227
|
+
|
228
|
+
it 'returns -1 when read callback returns empty and there are no more bytes' do
|
229
|
+
expect(@linear_reader).to receive(:read).and_return(["", false]).once
|
230
|
+
|
231
|
+
expect(@stream_reader.read(nil, 0, 42)).to eq(-1)
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'will store stream buffer' do
|
235
|
+
full_content = "entire content"
|
236
|
+
bytes_to_read = 4
|
237
|
+
input_buffer = Java::char[bytes_to_read].new
|
238
|
+
|
239
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
240
|
+
|
241
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
242
|
+
|
243
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length)
|
244
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read)
|
245
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content)
|
246
|
+
end
|
247
|
+
|
248
|
+
it 'will do nothing when drop_stream is called but the until_offset is greater than stream index' do
|
249
|
+
full_content = "entire content"
|
250
|
+
bytes_to_read = 4
|
251
|
+
input_buffer = Java::char[bytes_to_read].new
|
252
|
+
|
253
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
254
|
+
|
255
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
256
|
+
|
257
|
+
@stream_reader.drop_stream(@stream_reader.get_cached_stream_index + 1)
|
258
|
+
|
259
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length)
|
260
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read)
|
261
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content)
|
262
|
+
end
|
263
|
+
|
264
|
+
it 'will trim buffer stream when drop_stream is called' do
|
265
|
+
full_content = "entire content"
|
266
|
+
bytes_to_read = 4
|
267
|
+
until_offset = bytes_to_read - 2
|
268
|
+
input_buffer = Java::char[bytes_to_read].new
|
269
|
+
|
270
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
271
|
+
|
272
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
273
|
+
|
274
|
+
@stream_reader.drop_stream(until_offset)
|
275
|
+
|
276
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length - until_offset)
|
277
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read - until_offset)
|
278
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content[until_offset..-1])
|
279
|
+
end
|
280
|
+
end
|
@@ -0,0 +1,324 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "logging"
|
3
|
+
require "logstash/inputs/azureblob"
|
4
|
+
|
5
|
+
class LogStash::Codecs::JSON
|
6
|
+
end
|
7
|
+
|
8
|
+
describe LogStash::Inputs::LogstashInputAzureblob do
|
9
|
+
|
10
|
+
before(:each) do
|
11
|
+
@logger = Logging.logger(STDOUT)
|
12
|
+
@logger.level = :debug
|
13
|
+
|
14
|
+
@azure_blob_sdk = double
|
15
|
+
@json_codec = double("LogStash::Codecs::JSON", :is_a? => true)
|
16
|
+
@other_codec = double("other codec", :is_a? => false)
|
17
|
+
|
18
|
+
@azureblob_input = LogStash::Inputs::LogstashInputAzureblob.new
|
19
|
+
@azureblob_input.instance_variable_set(:@logger, @logger)
|
20
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, 0)
|
21
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, 0)
|
22
|
+
@azureblob_input.instance_variable_set(:@azure_blob, @azure_blob_sdk)
|
23
|
+
@azureblob_input.instance_variable_set(:@container, double)
|
24
|
+
@azureblob_input.instance_variable_set(:@codec, @other_codec)
|
25
|
+
allow(@azureblob_input).to receive(:update_registry)
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_json_codec
|
29
|
+
@azureblob_input.instance_variable_set(:@codec, @json_codec)
|
30
|
+
end
|
31
|
+
|
32
|
+
def stub_blob(blob_name, content)
|
33
|
+
allow(@azure_blob_sdk).to receive(:get_blob).with(anything(), blob_name, anything()) do |container, blob_name_arg, props |
|
34
|
+
->(){
|
35
|
+
start_index = 0
|
36
|
+
end_index = -1
|
37
|
+
start_index = props[:start_range] unless props[:start_range].nil?
|
38
|
+
end_index = props[:end_range] unless props[:end_range].nil?
|
39
|
+
|
40
|
+
ret_str = content[start_index..end_index]
|
41
|
+
@logger.debug("get_blob(#{start_index},#{end_index}): |#{ret_str}|")
|
42
|
+
return double, ret_str
|
43
|
+
}.call
|
44
|
+
end
|
45
|
+
|
46
|
+
return double(:name => blob_name, :properties => {
|
47
|
+
:content_length => content.length,
|
48
|
+
:etag => nil
|
49
|
+
})
|
50
|
+
end
|
51
|
+
|
52
|
+
it "can parse basic JSON" do
|
53
|
+
blob_name = "basic_json"
|
54
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
55
|
+
set_json_codec()
|
56
|
+
|
57
|
+
blob = stub_blob(blob_name, json_str)
|
58
|
+
|
59
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
60
|
+
|
61
|
+
expect(@json_codec).to receive(:decode).with(json_str).ordered
|
62
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
63
|
+
|
64
|
+
@azureblob_input.process(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "can parse multiple JSONs" do
|
68
|
+
blob_name = "multi_json"
|
69
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
70
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
71
|
+
json_str3 = " \n{\"entity3\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
72
|
+
set_json_codec()
|
73
|
+
|
74
|
+
blob = stub_blob(blob_name, json_str1 + json_str2 + json_str3)
|
75
|
+
|
76
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
77
|
+
|
78
|
+
expect(@json_codec).to receive(:decode).with(json_str1).once.ordered
|
79
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
80
|
+
expect(@json_codec).to receive(:decode).with(json_str3).once.ordered
|
81
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
82
|
+
|
83
|
+
@azureblob_input.process(nil)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "will parse JSONs from blob start" do
|
87
|
+
blob_name = "non_zero_json_start"
|
88
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
89
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
90
|
+
set_json_codec()
|
91
|
+
|
92
|
+
blob = stub_blob(blob_name, json_str1 + json_str2)
|
93
|
+
|
94
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, json_str1.length, nil])
|
95
|
+
|
96
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
97
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
98
|
+
|
99
|
+
@azureblob_input.process(nil)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "can parse out malformed JSONs" do
|
103
|
+
blob_name = "parse_out_malformed"
|
104
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
105
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
106
|
+
malformed_data = [",", "asdgasfgasfg", "{\"entity\"", "}", "{\"broken_json\":{\"a\":2 \"b\":3}}"]
|
107
|
+
set_json_codec()
|
108
|
+
|
109
|
+
malformed_data.each do |malformed|
|
110
|
+
blob = stub_blob(blob_name, json_str1 + malformed + json_str2)
|
111
|
+
|
112
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
113
|
+
|
114
|
+
expect(@json_codec).to receive(:decode).with(json_str1).once.ordered
|
115
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
116
|
+
|
117
|
+
@azureblob_input.process(nil)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
it "can build JSONs with header and tail" do
|
122
|
+
blob_name = "head_tail_json"
|
123
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
124
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
125
|
+
already_parsed = "{\"parsed_json\":true}"
|
126
|
+
head = "{\"xyz\":42}{\"entities\" : \n["
|
127
|
+
tail = "\n] }{\"abc\":42}\n"
|
128
|
+
set_json_codec()
|
129
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, head.length)
|
130
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, tail.length)
|
131
|
+
|
132
|
+
blob = stub_blob(blob_name, head + already_parsed + json_str1 + json_str2 + tail)
|
133
|
+
|
134
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, (head + already_parsed).length, nil])
|
135
|
+
|
136
|
+
expect(@json_codec).to receive(:decode).with(head + json_str1 + tail).once.ordered
|
137
|
+
expect(@json_codec).to receive(:decode).with(head + json_str2 + tail).once.ordered
|
138
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
139
|
+
|
140
|
+
@azureblob_input.process(nil)
|
141
|
+
end
|
142
|
+
|
143
|
+
it "will update the registry offset when parsing JSON" do
|
144
|
+
blob_name = "json_end_index"
|
145
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }},{},{\"a\":2} random text at the end"
|
146
|
+
set_json_codec()
|
147
|
+
|
148
|
+
blob = stub_blob(blob_name, content)
|
149
|
+
|
150
|
+
registry_file_path = ""
|
151
|
+
registry_offset = -1
|
152
|
+
|
153
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
154
|
+
allow(@json_codec).to receive(:decode).and_return([])
|
155
|
+
|
156
|
+
expect(@azureblob_input).to receive(:update_registry) do |new_registry_item|
|
157
|
+
registry_file_path = new_registry_item.file_path
|
158
|
+
registry_offset = new_registry_item.offset
|
159
|
+
end
|
160
|
+
|
161
|
+
@azureblob_input.process(nil)
|
162
|
+
|
163
|
+
expect(registry_file_path).to eq(blob_name)
|
164
|
+
expect(registry_offset).to eq(content.length)
|
165
|
+
end
|
166
|
+
|
167
|
+
it "can output simple text" do
|
168
|
+
blob_name = "basic_content"
|
169
|
+
content = "some text\nmore text"
|
170
|
+
|
171
|
+
blob = stub_blob(blob_name, content)
|
172
|
+
|
173
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
174
|
+
|
175
|
+
expect(@other_codec).to receive(:decode).with(content).ordered
|
176
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
177
|
+
|
178
|
+
@azureblob_input.process(nil)
|
179
|
+
end
|
180
|
+
|
181
|
+
it "will add header and tail when the codec is not json" do
|
182
|
+
blob_name = "head_tail_non_json"
|
183
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}\n{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
184
|
+
already_parsed = "{\"parsed_json\":true}"
|
185
|
+
head = "{\"xyz\":42}{\"entities\" : \n["
|
186
|
+
tail = "\n] }{\"abc\":42}\n"
|
187
|
+
|
188
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, head.length)
|
189
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, tail.length)
|
190
|
+
|
191
|
+
blob = stub_blob(blob_name, head + already_parsed + content + tail)
|
192
|
+
|
193
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, (head + already_parsed).length, nil])
|
194
|
+
|
195
|
+
expect(@other_codec).to receive(:decode).with(head + content + tail).once.ordered
|
196
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
197
|
+
|
198
|
+
@azureblob_input.process(nil)
|
199
|
+
end
|
200
|
+
|
201
|
+
it "will output content in chunks when the codec is not json" do
|
202
|
+
blob_name = "chunked_content"
|
203
|
+
#same size chunks
|
204
|
+
chunk1 = "first chunk \n|"
|
205
|
+
chunk2 = "second chunk \n"
|
206
|
+
chunk3 = "third chunk \n|"
|
207
|
+
smaller_chunk = "smaller\n"
|
208
|
+
content = chunk1 + chunk2 + chunk3 + smaller_chunk
|
209
|
+
|
210
|
+
blob = stub_blob(blob_name, content)
|
211
|
+
@azureblob_input.instance_variable_set(:@file_chunk_size_bytes, chunk1.length)
|
212
|
+
|
213
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
214
|
+
|
215
|
+
expect(@other_codec).to receive(:decode).with(chunk1).once.ordered
|
216
|
+
expect(@other_codec).to receive(:decode).with(chunk2).once.ordered
|
217
|
+
expect(@other_codec).to receive(:decode).with(chunk3).once.ordered
|
218
|
+
expect(@other_codec).to receive(:decode).with(smaller_chunk).once.ordered
|
219
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
220
|
+
|
221
|
+
@azureblob_input.process(nil)
|
222
|
+
end
|
223
|
+
|
224
|
+
it "will start from offset index when the codec is not json" do
|
225
|
+
blob_name = "skip_start_index"
|
226
|
+
already_parsed = "===="
|
227
|
+
actual_content = "some text\nmore text"
|
228
|
+
|
229
|
+
blob = stub_blob(blob_name, already_parsed + actual_content)
|
230
|
+
|
231
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, already_parsed.length, nil])
|
232
|
+
|
233
|
+
expect(@other_codec).to receive(:decode).with(actual_content).ordered
|
234
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
235
|
+
|
236
|
+
@azureblob_input.process(nil)
|
237
|
+
end
|
238
|
+
|
239
|
+
it "will update the registry offset when the codec is not json" do
|
240
|
+
blob_name = "non_json_end_index"
|
241
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }},{},{\"a\":2} random text at the end"
|
242
|
+
|
243
|
+
blob = stub_blob(blob_name, content)
|
244
|
+
|
245
|
+
registry_file_path = ""
|
246
|
+
registry_offset = -1
|
247
|
+
|
248
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
249
|
+
allow(@other_codec).to receive(:decode).and_return([])
|
250
|
+
|
251
|
+
expect(@azureblob_input).to receive(:update_registry) do |new_registry_item|
|
252
|
+
registry_file_path = new_registry_item.file_path
|
253
|
+
registry_offset = new_registry_item.offset
|
254
|
+
end
|
255
|
+
|
256
|
+
@azureblob_input.process(nil)
|
257
|
+
|
258
|
+
expect(registry_file_path).to eq(blob_name)
|
259
|
+
expect(registry_offset).to eq(content.length)
|
260
|
+
end
|
261
|
+
|
262
|
+
it "will update registry after n entries" do
|
263
|
+
chunk_size = 5
|
264
|
+
update_count = 3
|
265
|
+
blob_name = "force_registry_offset"
|
266
|
+
entries = [
|
267
|
+
"first chunk \n",
|
268
|
+
"second chunk",
|
269
|
+
"third",
|
270
|
+
"dgsdfgfgfg",
|
271
|
+
"132435436",
|
272
|
+
"dgsdfgfgfg"
|
273
|
+
]
|
274
|
+
stub_const("LogStash::Inputs::LogstashInputAzureblob::UPDATE_REGISTRY_COUNT", update_count)
|
275
|
+
|
276
|
+
content = ""
|
277
|
+
entries.each do |entry|
|
278
|
+
content << entry[0..chunk_size]
|
279
|
+
end
|
280
|
+
|
281
|
+
blob = stub_blob(blob_name, content)
|
282
|
+
|
283
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
284
|
+
@azureblob_input.instance_variable_set(:@file_chunk_size_bytes, chunk_size)
|
285
|
+
allow(@other_codec).to receive(:decode).and_return([])
|
286
|
+
|
287
|
+
update_registry_count = entries.length / update_count + 1
|
288
|
+
expect(@azureblob_input).to receive(:update_registry).exactly(update_registry_count).times
|
289
|
+
|
290
|
+
@azureblob_input.process(nil)
|
291
|
+
end
|
292
|
+
|
293
|
+
it "will update registry after n entries when the codec is json" do
|
294
|
+
chunk_size = 5
|
295
|
+
update_count = 3
|
296
|
+
blob_name = "force_registry_offset_json"
|
297
|
+
entries = [
|
298
|
+
"{\"entity\":{ \"number\":42, \"string\":\"some string\" }}\n{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}",
|
299
|
+
"invalid",
|
300
|
+
"{\"val\":42}\n ",
|
301
|
+
"{}",
|
302
|
+
"{\"val}",
|
303
|
+
"dgsdfgfgfg"
|
304
|
+
]
|
305
|
+
set_json_codec()
|
306
|
+
stub_const("LogStash::Inputs::LogstashInputAzureblob::UPDATE_REGISTRY_COUNT", update_count)
|
307
|
+
|
308
|
+
content = ""
|
309
|
+
entries.each do |entry|
|
310
|
+
content << entry
|
311
|
+
end
|
312
|
+
|
313
|
+
blob = stub_blob(blob_name, content)
|
314
|
+
|
315
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
316
|
+
allow(@json_codec).to receive(:decode).and_return([])
|
317
|
+
|
318
|
+
update_registry_count = entries.length / update_count + 1
|
319
|
+
expect(@azureblob_input).to receive(:update_registry).exactly(update_registry_count).times
|
320
|
+
|
321
|
+
@azureblob_input.process(nil)
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|