logstash-input-azureblob 0.9.12-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +2 -0
- data/LICENSE +17 -0
- data/README.md +253 -0
- data/lib/com/microsoft/json-parser.rb +202 -0
- data/lib/logstash-input-azureblob_jars.rb +10 -0
- data/lib/logstash/inputs/azureblob.rb +500 -0
- data/lib/org/glassfish/javax.json/1.1/javax.json-1.1.jar +0 -0
- data/logstash-input-azureblob.gemspec +32 -0
- data/spec/com/microsoft/json-parser_spec.rb +280 -0
- data/spec/inputs/azureblob_spec.rb +324 -0
- metadata +165 -0
Binary file
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = 'logstash-input-azureblob'
|
3
|
+
s.version = '0.9.12'
|
4
|
+
s.platform = "java"
|
5
|
+
s.licenses = ['Apache License (2.0)']
|
6
|
+
s.summary = 'This plugin collects Microsoft Azure Diagnostics data from Azure Storage Blobs.'
|
7
|
+
s.description = 'This gem is a Logstash plugin. It reads and parses data from Azure Storage Blobs.'
|
8
|
+
s.homepage = 'https://github.com/Azure/azure-diagnostics-tools'
|
9
|
+
s.authors = ['Microsoft Corporation']
|
10
|
+
s.email = 'azdiag@microsoft.com'
|
11
|
+
s.require_paths = ['lib']
|
12
|
+
|
13
|
+
# Files
|
14
|
+
s.files = Dir['lib/**/*','spec/**/*','vendor/**/*','*.gemspec','*.md','Gemfile','LICENSE']
|
15
|
+
# Tests
|
16
|
+
s.test_files = s.files.grep(%r{^(test|spec|features)/})
|
17
|
+
|
18
|
+
# Special flag to let us know this is actually a logstash plugin
|
19
|
+
s.metadata = { "logstash_plugin" => "true", "logstash_group" => "input" }
|
20
|
+
|
21
|
+
# Gem dependencies
|
22
|
+
s.add_runtime_dependency "logstash-core-plugin-api", '>= 1.60', '<= 2.99'
|
23
|
+
s.add_runtime_dependency 'logstash-codec-json_lines'
|
24
|
+
s.add_runtime_dependency 'stud', '>= 0.0.22'
|
25
|
+
s.add_runtime_dependency 'azure-storage', '~> 0.12.3.preview'
|
26
|
+
s.add_development_dependency 'logstash-devutils'
|
27
|
+
s.add_development_dependency 'logging'
|
28
|
+
|
29
|
+
# Jar dependencies
|
30
|
+
s.requirements << "jar 'org.glassfish:javax.json', '1.1'"
|
31
|
+
s.add_runtime_dependency 'jar-dependencies'
|
32
|
+
end
|
@@ -0,0 +1,280 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "logging"
|
3
|
+
require "com/microsoft/json-parser"
|
4
|
+
|
5
|
+
describe JsonParser do
|
6
|
+
before(:each) do
|
7
|
+
@logger = Logging.logger(STDOUT)
|
8
|
+
@logger.level = :debug
|
9
|
+
@linear_reader = spy
|
10
|
+
|
11
|
+
@on_content = double
|
12
|
+
@on_error = double
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct(json_str)
|
16
|
+
@linear_reader_index = 0
|
17
|
+
allow(@linear_reader).to receive(:read) do ->(){
|
18
|
+
start_index = @linear_reader_index
|
19
|
+
@linear_reader_index = @linear_reader_index + 42
|
20
|
+
return json_str[start_index..@linear_reader_index - 1], @linear_reader_index < json_str.length ? true : false
|
21
|
+
}.call
|
22
|
+
end
|
23
|
+
return JsonParser.new(@logger, @linear_reader)
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'can parse a complete JSON' do
|
27
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
28
|
+
parser = construct(json_str)
|
29
|
+
|
30
|
+
expect(@on_error).to_not receive(:call)
|
31
|
+
expect(@on_content).to receive(:call).with(json_str).once
|
32
|
+
|
33
|
+
parser.parse(@on_content, @on_error)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'can parse multiple JSON objects' do
|
37
|
+
json_strings = [
|
38
|
+
"{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}",
|
39
|
+
"{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}",
|
40
|
+
"{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}",
|
41
|
+
"\n\r{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}",
|
42
|
+
" {\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}",
|
43
|
+
" {\"abc\" :\"xyz\"}"
|
44
|
+
]
|
45
|
+
content = ""
|
46
|
+
json_strings.each do |str|
|
47
|
+
content << str
|
48
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
49
|
+
end
|
50
|
+
expect(@on_error).to_not receive(:call)
|
51
|
+
|
52
|
+
parser = construct(content)
|
53
|
+
|
54
|
+
parser.parse(@on_content, @on_error)
|
55
|
+
end
|
56
|
+
|
57
|
+
it 'will ignore regular text' do
|
58
|
+
not_a_json = "not a json"
|
59
|
+
parser = construct(not_a_json)
|
60
|
+
skipped_bytes = 0
|
61
|
+
expect(@on_content).to_not receive(:call)
|
62
|
+
|
63
|
+
received_malformed_str = ""
|
64
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
65
|
+
received_malformed_str << malformed_json
|
66
|
+
end
|
67
|
+
|
68
|
+
parser.parse(@on_content, @on_error)
|
69
|
+
|
70
|
+
expect(received_malformed_str).to eq(not_a_json)
|
71
|
+
end
|
72
|
+
|
73
|
+
it 'will ignore malformed JSON' do
|
74
|
+
not_a_json = "{\"entity\":{ \"number\":42, \"string\":\"comma is missing here ->\" \"<- here\":null }}"
|
75
|
+
parser = construct(not_a_json)
|
76
|
+
skipped_bytes = 0
|
77
|
+
expect(@on_content).to_not receive(:call)
|
78
|
+
|
79
|
+
received_malformed_str = ""
|
80
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
81
|
+
received_malformed_str << malformed_json
|
82
|
+
end
|
83
|
+
|
84
|
+
parser.parse(@on_content, @on_error)
|
85
|
+
|
86
|
+
expect(received_malformed_str).to eq(not_a_json)
|
87
|
+
end
|
88
|
+
|
89
|
+
it 'will skip comma between JSONs' do
|
90
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
91
|
+
not_a_json = ","
|
92
|
+
parser = construct(json_str+not_a_json+json_str)
|
93
|
+
|
94
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
95
|
+
expect(@on_error).to receive(:call).with(",").once.ordered
|
96
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
97
|
+
|
98
|
+
parser.parse(@on_content, @on_error)
|
99
|
+
end
|
100
|
+
|
101
|
+
it 'will skip regular text in the middle' do
|
102
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
103
|
+
not_a_json = "not a json"
|
104
|
+
parser = construct(json_str+not_a_json+json_str)
|
105
|
+
|
106
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
107
|
+
expect(@on_content).to receive(:call).with(json_str).once.ordered
|
108
|
+
|
109
|
+
received_malformed_str = ""
|
110
|
+
allow(@on_error).to receive(:call) do |malformed_json|
|
111
|
+
received_malformed_str << malformed_json
|
112
|
+
end
|
113
|
+
|
114
|
+
parser.parse(@on_content, @on_error)
|
115
|
+
|
116
|
+
expect(received_malformed_str).to eq(not_a_json)
|
117
|
+
end
|
118
|
+
|
119
|
+
it 'can parse multiple JSON objects in between malformed content' do
|
120
|
+
strings = [
|
121
|
+
[ true, "{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}"],
|
122
|
+
[ true, "{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}"],
|
123
|
+
[ false, ","],
|
124
|
+
[ true, "{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}"],
|
125
|
+
[ false, "some random text \n\r"],
|
126
|
+
[ true, "{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}"],
|
127
|
+
[ false, "{\"entity\":{ \"number\":42, \"string\":\"some string\" \"val\":null }} "],
|
128
|
+
[ true, "{\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}"],
|
129
|
+
[ true, " {\"abc\" :\"xyz\"}"]
|
130
|
+
]
|
131
|
+
content = ""
|
132
|
+
strings.each do |is_valid_json, str|
|
133
|
+
content << str
|
134
|
+
if is_valid_json
|
135
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
136
|
+
else
|
137
|
+
end
|
138
|
+
end
|
139
|
+
allow(@on_error).to receive(:call)
|
140
|
+
|
141
|
+
parser = construct(content)
|
142
|
+
|
143
|
+
parser.parse(@on_content, @on_error)
|
144
|
+
end
|
145
|
+
|
146
|
+
it 'will batch together malformed content in a single callback' do
|
147
|
+
strings = [
|
148
|
+
[ true, "{\"entity\":{ \"number\":42, \"string\":\"some string\", \"val\":null }}"],
|
149
|
+
[ true, "{\"entity2\":{ \"number2\":422, \"string2\":\"some string2\" }}"],
|
150
|
+
[ false, ","],
|
151
|
+
[ true, "{\"entity3\":{ \"number3\":422, \"string3\":\"some string2\", \"array\":[{\"abc\":\"xyz\"}] }}"],
|
152
|
+
[ false, "some random text \n\r"], #whitespace after malformed data will be part of the malformed string
|
153
|
+
[ true, "{\"entity4\":{ \"number4\":422, \"string4\":\"some string2\", \"empty_array\":[] }}"],
|
154
|
+
[ false, "{\"entity\":{ \"number\":42, \"string\":\"some string\" \"val\":null }} "],
|
155
|
+
[ true, "{\"entity5\":{ \"number5\":422, \"string5\":\"some string2\" }}"],
|
156
|
+
[ true, "\n\r {\"abc\" :\"xyz\"}"] # whitespace after correct jsons will be part of the next json
|
157
|
+
]
|
158
|
+
content = ""
|
159
|
+
strings.each do |is_valid_json, str|
|
160
|
+
content << str
|
161
|
+
if is_valid_json
|
162
|
+
expect(@on_content).to receive(:call).with(str).ordered
|
163
|
+
else
|
164
|
+
expect(@on_error).to receive(:call).with(str).ordered
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
parser = construct(content)
|
169
|
+
|
170
|
+
parser.parse(@on_content, @on_error)
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
describe StreamReader do
|
175
|
+
before(:each) do
|
176
|
+
@logger = Logging.logger(STDOUT)
|
177
|
+
@logger.level = :debug
|
178
|
+
|
179
|
+
@linear_reader = double
|
180
|
+
@stream_reader = StreamReader.new(@logger, @linear_reader)
|
181
|
+
end
|
182
|
+
|
183
|
+
it 'does not support mark' do
|
184
|
+
expect(@stream_reader.markSupported).to eq(false)
|
185
|
+
end
|
186
|
+
|
187
|
+
it 'can read full stream' do
|
188
|
+
full_content = "entire content"
|
189
|
+
input_buffer = Java::char[full_content.length].new
|
190
|
+
|
191
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
192
|
+
|
193
|
+
@stream_reader.read(input_buffer, 0, full_content.length)
|
194
|
+
|
195
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
196
|
+
end
|
197
|
+
|
198
|
+
it 'reads until requested buffer is filled' do
|
199
|
+
full_content = "entire content"
|
200
|
+
input_buffer = Java::char[full_content.length].new
|
201
|
+
|
202
|
+
expect(@linear_reader).to receive(:read).twice.and_return([full_content[0..full_content.length/2], true],[full_content[full_content.length/2 + 1..-1], true])
|
203
|
+
|
204
|
+
@stream_reader.read(input_buffer, 0, full_content.length)
|
205
|
+
|
206
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
207
|
+
end
|
208
|
+
|
209
|
+
it 'does not call the read callback when buffer length is 0' do
|
210
|
+
expect(@linear_reader).to_not receive(:read)
|
211
|
+
|
212
|
+
@stream_reader.read(nil, 0, 0)
|
213
|
+
end
|
214
|
+
|
215
|
+
it 'caches if it reads ahead' do
|
216
|
+
full_content = "entire content"
|
217
|
+
input_buffer = Java::char[full_content.length].new
|
218
|
+
|
219
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
220
|
+
|
221
|
+
(0..full_content.length - 1).each do |i|
|
222
|
+
@stream_reader.read(input_buffer, i, 1)
|
223
|
+
end
|
224
|
+
|
225
|
+
expect(java::lang::String.new(input_buffer)).to eq(full_content)
|
226
|
+
end
|
227
|
+
|
228
|
+
it 'returns -1 when read callback returns empty and there are no more bytes' do
|
229
|
+
expect(@linear_reader).to receive(:read).and_return(["", false]).once
|
230
|
+
|
231
|
+
expect(@stream_reader.read(nil, 0, 42)).to eq(-1)
|
232
|
+
end
|
233
|
+
|
234
|
+
it 'will store stream buffer' do
|
235
|
+
full_content = "entire content"
|
236
|
+
bytes_to_read = 4
|
237
|
+
input_buffer = Java::char[bytes_to_read].new
|
238
|
+
|
239
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
240
|
+
|
241
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
242
|
+
|
243
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length)
|
244
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read)
|
245
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content)
|
246
|
+
end
|
247
|
+
|
248
|
+
it 'will do nothing when drop_stream is called but the until_offset is greater than stream index' do
|
249
|
+
full_content = "entire content"
|
250
|
+
bytes_to_read = 4
|
251
|
+
input_buffer = Java::char[bytes_to_read].new
|
252
|
+
|
253
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
254
|
+
|
255
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
256
|
+
|
257
|
+
@stream_reader.drop_stream(@stream_reader.get_cached_stream_index + 1)
|
258
|
+
|
259
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length)
|
260
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read)
|
261
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content)
|
262
|
+
end
|
263
|
+
|
264
|
+
it 'will trim buffer stream when drop_stream is called' do
|
265
|
+
full_content = "entire content"
|
266
|
+
bytes_to_read = 4
|
267
|
+
until_offset = bytes_to_read - 2
|
268
|
+
input_buffer = Java::char[bytes_to_read].new
|
269
|
+
|
270
|
+
expect(@linear_reader).to receive(:read).and_return([full_content, false]).once
|
271
|
+
|
272
|
+
@stream_reader.read(input_buffer, 0, bytes_to_read)
|
273
|
+
|
274
|
+
@stream_reader.drop_stream(until_offset)
|
275
|
+
|
276
|
+
expect(@stream_reader.get_cached_stream_length).to eq(full_content.length - until_offset)
|
277
|
+
expect(@stream_reader.get_cached_stream_index).to eq(bytes_to_read - until_offset)
|
278
|
+
expect(@stream_reader.get_stream_buffer(0,-1)).to eq(full_content[until_offset..-1])
|
279
|
+
end
|
280
|
+
end
|
@@ -0,0 +1,324 @@
|
|
1
|
+
require "logstash/devutils/rspec/spec_helper"
|
2
|
+
require "logging"
|
3
|
+
require "logstash/inputs/azureblob"
|
4
|
+
|
5
|
+
class LogStash::Codecs::JSON
|
6
|
+
end
|
7
|
+
|
8
|
+
describe LogStash::Inputs::LogstashInputAzureblob do
|
9
|
+
|
10
|
+
before(:each) do
|
11
|
+
@logger = Logging.logger(STDOUT)
|
12
|
+
@logger.level = :debug
|
13
|
+
|
14
|
+
@azure_blob_sdk = double
|
15
|
+
@json_codec = double("LogStash::Codecs::JSON", :is_a? => true)
|
16
|
+
@other_codec = double("other codec", :is_a? => false)
|
17
|
+
|
18
|
+
@azureblob_input = LogStash::Inputs::LogstashInputAzureblob.new
|
19
|
+
@azureblob_input.instance_variable_set(:@logger, @logger)
|
20
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, 0)
|
21
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, 0)
|
22
|
+
@azureblob_input.instance_variable_set(:@azure_blob, @azure_blob_sdk)
|
23
|
+
@azureblob_input.instance_variable_set(:@container, double)
|
24
|
+
@azureblob_input.instance_variable_set(:@codec, @other_codec)
|
25
|
+
allow(@azureblob_input).to receive(:update_registry)
|
26
|
+
end
|
27
|
+
|
28
|
+
def set_json_codec
|
29
|
+
@azureblob_input.instance_variable_set(:@codec, @json_codec)
|
30
|
+
end
|
31
|
+
|
32
|
+
def stub_blob(blob_name, content)
|
33
|
+
allow(@azure_blob_sdk).to receive(:get_blob).with(anything(), blob_name, anything()) do |container, blob_name_arg, props |
|
34
|
+
->(){
|
35
|
+
start_index = 0
|
36
|
+
end_index = -1
|
37
|
+
start_index = props[:start_range] unless props[:start_range].nil?
|
38
|
+
end_index = props[:end_range] unless props[:end_range].nil?
|
39
|
+
|
40
|
+
ret_str = content[start_index..end_index]
|
41
|
+
@logger.debug("get_blob(#{start_index},#{end_index}): |#{ret_str}|")
|
42
|
+
return double, ret_str
|
43
|
+
}.call
|
44
|
+
end
|
45
|
+
|
46
|
+
return double(:name => blob_name, :properties => {
|
47
|
+
:content_length => content.length,
|
48
|
+
:etag => nil
|
49
|
+
})
|
50
|
+
end
|
51
|
+
|
52
|
+
it "can parse basic JSON" do
|
53
|
+
blob_name = "basic_json"
|
54
|
+
json_str = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
55
|
+
set_json_codec()
|
56
|
+
|
57
|
+
blob = stub_blob(blob_name, json_str)
|
58
|
+
|
59
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
60
|
+
|
61
|
+
expect(@json_codec).to receive(:decode).with(json_str).ordered
|
62
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
63
|
+
|
64
|
+
@azureblob_input.process(nil)
|
65
|
+
end
|
66
|
+
|
67
|
+
it "can parse multiple JSONs" do
|
68
|
+
blob_name = "multi_json"
|
69
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
70
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
71
|
+
json_str3 = " \n{\"entity3\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
72
|
+
set_json_codec()
|
73
|
+
|
74
|
+
blob = stub_blob(blob_name, json_str1 + json_str2 + json_str3)
|
75
|
+
|
76
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
77
|
+
|
78
|
+
expect(@json_codec).to receive(:decode).with(json_str1).once.ordered
|
79
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
80
|
+
expect(@json_codec).to receive(:decode).with(json_str3).once.ordered
|
81
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
82
|
+
|
83
|
+
@azureblob_input.process(nil)
|
84
|
+
end
|
85
|
+
|
86
|
+
it "will parse JSONs from blob start" do
|
87
|
+
blob_name = "non_zero_json_start"
|
88
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
89
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
90
|
+
set_json_codec()
|
91
|
+
|
92
|
+
blob = stub_blob(blob_name, json_str1 + json_str2)
|
93
|
+
|
94
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, json_str1.length, nil])
|
95
|
+
|
96
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
97
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
98
|
+
|
99
|
+
@azureblob_input.process(nil)
|
100
|
+
end
|
101
|
+
|
102
|
+
it "can parse out malformed JSONs" do
|
103
|
+
blob_name = "parse_out_malformed"
|
104
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
105
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
106
|
+
malformed_data = [",", "asdgasfgasfg", "{\"entity\"", "}", "{\"broken_json\":{\"a\":2 \"b\":3}}"]
|
107
|
+
set_json_codec()
|
108
|
+
|
109
|
+
malformed_data.each do |malformed|
|
110
|
+
blob = stub_blob(blob_name, json_str1 + malformed + json_str2)
|
111
|
+
|
112
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
113
|
+
|
114
|
+
expect(@json_codec).to receive(:decode).with(json_str1).once.ordered
|
115
|
+
expect(@json_codec).to receive(:decode).with(json_str2).once.ordered
|
116
|
+
|
117
|
+
@azureblob_input.process(nil)
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
it "can build JSONs with header and tail" do
|
122
|
+
blob_name = "head_tail_json"
|
123
|
+
json_str1 = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}"
|
124
|
+
json_str2 = "{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
125
|
+
already_parsed = "{\"parsed_json\":true}"
|
126
|
+
head = "{\"xyz\":42}{\"entities\" : \n["
|
127
|
+
tail = "\n] }{\"abc\":42}\n"
|
128
|
+
set_json_codec()
|
129
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, head.length)
|
130
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, tail.length)
|
131
|
+
|
132
|
+
blob = stub_blob(blob_name, head + already_parsed + json_str1 + json_str2 + tail)
|
133
|
+
|
134
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, (head + already_parsed).length, nil])
|
135
|
+
|
136
|
+
expect(@json_codec).to receive(:decode).with(head + json_str1 + tail).once.ordered
|
137
|
+
expect(@json_codec).to receive(:decode).with(head + json_str2 + tail).once.ordered
|
138
|
+
expect(@json_codec).to_not receive(:decode).ordered
|
139
|
+
|
140
|
+
@azureblob_input.process(nil)
|
141
|
+
end
|
142
|
+
|
143
|
+
it "will update the registry offset when parsing JSON" do
|
144
|
+
blob_name = "json_end_index"
|
145
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }},{},{\"a\":2} random text at the end"
|
146
|
+
set_json_codec()
|
147
|
+
|
148
|
+
blob = stub_blob(blob_name, content)
|
149
|
+
|
150
|
+
registry_file_path = ""
|
151
|
+
registry_offset = -1
|
152
|
+
|
153
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
154
|
+
allow(@json_codec).to receive(:decode).and_return([])
|
155
|
+
|
156
|
+
expect(@azureblob_input).to receive(:update_registry) do |new_registry_item|
|
157
|
+
registry_file_path = new_registry_item.file_path
|
158
|
+
registry_offset = new_registry_item.offset
|
159
|
+
end
|
160
|
+
|
161
|
+
@azureblob_input.process(nil)
|
162
|
+
|
163
|
+
expect(registry_file_path).to eq(blob_name)
|
164
|
+
expect(registry_offset).to eq(content.length)
|
165
|
+
end
|
166
|
+
|
167
|
+
it "can output simple text" do
|
168
|
+
blob_name = "basic_content"
|
169
|
+
content = "some text\nmore text"
|
170
|
+
|
171
|
+
blob = stub_blob(blob_name, content)
|
172
|
+
|
173
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
174
|
+
|
175
|
+
expect(@other_codec).to receive(:decode).with(content).ordered
|
176
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
177
|
+
|
178
|
+
@azureblob_input.process(nil)
|
179
|
+
end
|
180
|
+
|
181
|
+
it "will add header and tail when the codec is not json" do
|
182
|
+
blob_name = "head_tail_non_json"
|
183
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }}\n{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}"
|
184
|
+
already_parsed = "{\"parsed_json\":true}"
|
185
|
+
head = "{\"xyz\":42}{\"entities\" : \n["
|
186
|
+
tail = "\n] }{\"abc\":42}\n"
|
187
|
+
|
188
|
+
@azureblob_input.instance_variable_set(:@file_head_bytes, head.length)
|
189
|
+
@azureblob_input.instance_variable_set(:@file_tail_bytes, tail.length)
|
190
|
+
|
191
|
+
blob = stub_blob(blob_name, head + already_parsed + content + tail)
|
192
|
+
|
193
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, (head + already_parsed).length, nil])
|
194
|
+
|
195
|
+
expect(@other_codec).to receive(:decode).with(head + content + tail).once.ordered
|
196
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
197
|
+
|
198
|
+
@azureblob_input.process(nil)
|
199
|
+
end
|
200
|
+
|
201
|
+
it "will output content in chunks when the codec is not json" do
|
202
|
+
blob_name = "chunked_content"
|
203
|
+
#same size chunks
|
204
|
+
chunk1 = "first chunk \n|"
|
205
|
+
chunk2 = "second chunk \n"
|
206
|
+
chunk3 = "third chunk \n|"
|
207
|
+
smaller_chunk = "smaller\n"
|
208
|
+
content = chunk1 + chunk2 + chunk3 + smaller_chunk
|
209
|
+
|
210
|
+
blob = stub_blob(blob_name, content)
|
211
|
+
@azureblob_input.instance_variable_set(:@file_chunk_size_bytes, chunk1.length)
|
212
|
+
|
213
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
214
|
+
|
215
|
+
expect(@other_codec).to receive(:decode).with(chunk1).once.ordered
|
216
|
+
expect(@other_codec).to receive(:decode).with(chunk2).once.ordered
|
217
|
+
expect(@other_codec).to receive(:decode).with(chunk3).once.ordered
|
218
|
+
expect(@other_codec).to receive(:decode).with(smaller_chunk).once.ordered
|
219
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
220
|
+
|
221
|
+
@azureblob_input.process(nil)
|
222
|
+
end
|
223
|
+
|
224
|
+
it "will start from offset index when the codec is not json" do
|
225
|
+
blob_name = "skip_start_index"
|
226
|
+
already_parsed = "===="
|
227
|
+
actual_content = "some text\nmore text"
|
228
|
+
|
229
|
+
blob = stub_blob(blob_name, already_parsed + actual_content)
|
230
|
+
|
231
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, already_parsed.length, nil])
|
232
|
+
|
233
|
+
expect(@other_codec).to receive(:decode).with(actual_content).ordered
|
234
|
+
expect(@other_codec).to_not receive(:decode).ordered
|
235
|
+
|
236
|
+
@azureblob_input.process(nil)
|
237
|
+
end
|
238
|
+
|
239
|
+
it "will update the registry offset when the codec is not json" do
|
240
|
+
blob_name = "non_json_end_index"
|
241
|
+
content = "{\"entity\":{ \"number\":42, \"string\":\"some string\" }},{},{\"a\":2} random text at the end"
|
242
|
+
|
243
|
+
blob = stub_blob(blob_name, content)
|
244
|
+
|
245
|
+
registry_file_path = ""
|
246
|
+
registry_offset = -1
|
247
|
+
|
248
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
249
|
+
allow(@other_codec).to receive(:decode).and_return([])
|
250
|
+
|
251
|
+
expect(@azureblob_input).to receive(:update_registry) do |new_registry_item|
|
252
|
+
registry_file_path = new_registry_item.file_path
|
253
|
+
registry_offset = new_registry_item.offset
|
254
|
+
end
|
255
|
+
|
256
|
+
@azureblob_input.process(nil)
|
257
|
+
|
258
|
+
expect(registry_file_path).to eq(blob_name)
|
259
|
+
expect(registry_offset).to eq(content.length)
|
260
|
+
end
|
261
|
+
|
262
|
+
it "will update registry after n entries" do
|
263
|
+
chunk_size = 5
|
264
|
+
update_count = 3
|
265
|
+
blob_name = "force_registry_offset"
|
266
|
+
entries = [
|
267
|
+
"first chunk \n",
|
268
|
+
"second chunk",
|
269
|
+
"third",
|
270
|
+
"dgsdfgfgfg",
|
271
|
+
"132435436",
|
272
|
+
"dgsdfgfgfg"
|
273
|
+
]
|
274
|
+
stub_const("LogStash::Inputs::LogstashInputAzureblob::UPDATE_REGISTRY_COUNT", update_count)
|
275
|
+
|
276
|
+
content = ""
|
277
|
+
entries.each do |entry|
|
278
|
+
content << entry[0..chunk_size]
|
279
|
+
end
|
280
|
+
|
281
|
+
blob = stub_blob(blob_name, content)
|
282
|
+
|
283
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
284
|
+
@azureblob_input.instance_variable_set(:@file_chunk_size_bytes, chunk_size)
|
285
|
+
allow(@other_codec).to receive(:decode).and_return([])
|
286
|
+
|
287
|
+
update_registry_count = entries.length / update_count + 1
|
288
|
+
expect(@azureblob_input).to receive(:update_registry).exactly(update_registry_count).times
|
289
|
+
|
290
|
+
@azureblob_input.process(nil)
|
291
|
+
end
|
292
|
+
|
293
|
+
it "will update registry after n entries when the codec is json" do
|
294
|
+
chunk_size = 5
|
295
|
+
update_count = 3
|
296
|
+
blob_name = "force_registry_offset_json"
|
297
|
+
entries = [
|
298
|
+
"{\"entity\":{ \"number\":42, \"string\":\"some string\" }}\n{\"entity2\":{ \"number2\":422, \"string2\":\"some other string\" }}",
|
299
|
+
"invalid",
|
300
|
+
"{\"val\":42}\n ",
|
301
|
+
"{}",
|
302
|
+
"{\"val}",
|
303
|
+
"dgsdfgfgfg"
|
304
|
+
]
|
305
|
+
set_json_codec()
|
306
|
+
stub_const("LogStash::Inputs::LogstashInputAzureblob::UPDATE_REGISTRY_COUNT", update_count)
|
307
|
+
|
308
|
+
content = ""
|
309
|
+
entries.each do |entry|
|
310
|
+
content << entry
|
311
|
+
end
|
312
|
+
|
313
|
+
blob = stub_blob(blob_name, content)
|
314
|
+
|
315
|
+
allow(@azureblob_input).to receive(:register_for_read).and_return([blob, 0, nil])
|
316
|
+
allow(@json_codec).to receive(:decode).and_return([])
|
317
|
+
|
318
|
+
update_registry_count = entries.length / update_count + 1
|
319
|
+
expect(@azureblob_input).to receive(:update_registry).exactly(update_registry_count).times
|
320
|
+
|
321
|
+
@azureblob_input.process(nil)
|
322
|
+
end
|
323
|
+
|
324
|
+
end
|