logstash-input-cloudflare 0.9.5 → 0.9.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/logstash/inputs/cloudflare.rb +117 -70
- data/logstash-input-cloudflare.gemspec +1 -1
- data/spec/inputs/cloudflare_spec.rb +3 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce272684c54290cb373b1b2f7632cacda4dc41c7
|
4
|
+
data.tar.gz: f9b2c0a8d0ec6e9ea3b97d4277992dab958c4188
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d3a1855c767bbdaf4834b36650db2f443851340915ae814c6946699e13390a2e6ff35f48253ffeb00707a65aa4d3e04daa75d7141556978a37daae0b1bad08f
|
7
|
+
data.tar.gz: c28bc6bc2a081aa88248e0a941536ef5ace9bc1f1c714b9fefb643c2bb959ebabcbb458d13c4af403e48d0e3fd2b0a57b78885c59a229f133c2eebd6880f77e3
|
data/CHANGELOG.md
CHANGED
@@ -71,12 +71,11 @@ class LogStash::Inputs::Cloudflare < LogStash::Inputs::Base
|
|
71
71
|
config :metadata_filepath,
|
72
72
|
validate: :string, default: '/tmp/cf_logstash_metadata.json', required: false
|
73
73
|
config :poll_time, validate: :number, default: 15, required: false
|
74
|
+
config :poll_interval, validate: :number, default: 60, required: false
|
74
75
|
config :start_from_secs_ago, validate: :number, default: 1200, required: false
|
75
76
|
config :batch_size, validate: :number, default: 1000, required: false
|
76
77
|
config :fields, validate: :array, default: DEFAULT_FIELDS, required: false
|
77
78
|
|
78
|
-
public
|
79
|
-
|
80
79
|
def register
|
81
80
|
@host = Socket.gethostname
|
82
81
|
end # def register
|
@@ -110,9 +109,26 @@ class LogStash::Inputs::Cloudflare < LogStash::Inputs::Base
|
|
110
109
|
end
|
111
110
|
end # def write_metadata
|
112
111
|
|
113
|
-
def
|
112
|
+
def _build_uri(endpoint, params)
|
114
113
|
uri = URI("https://api.cloudflare.com/client/v4#{endpoint}")
|
115
114
|
uri.query = URI.encode_www_form(params)
|
115
|
+
uri
|
116
|
+
end
|
117
|
+
|
118
|
+
def _process_response(response, multi_line)
|
119
|
+
content = response_body(response)
|
120
|
+
if response.code != '200'
|
121
|
+
raise CloudflareAPIError.new(uri.to_s, response, content),
|
122
|
+
'Error calling Cloudflare API'
|
123
|
+
end
|
124
|
+
@logger.info("Received response from Cloudflare API (status_code: #{response.code})")
|
125
|
+
lines = parse_content(content)
|
126
|
+
return lines if multi_line
|
127
|
+
lines[0]
|
128
|
+
end # def _process_response
|
129
|
+
|
130
|
+
def cloudflare_api_call(endpoint, params, multi_line = false)
|
131
|
+
uri = _build_uri(endpoint, params)
|
116
132
|
@logger.info('Sending request to Cloudflare')
|
117
133
|
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
118
134
|
request = Net::HTTP::Get.new(
|
@@ -122,15 +138,7 @@ class LogStash::Inputs::Cloudflare < LogStash::Inputs::Base
|
|
122
138
|
'X-Auth-Key' => @auth_key
|
123
139
|
)
|
124
140
|
response = http.request(request)
|
125
|
-
|
126
|
-
if response.code != '200'
|
127
|
-
raise CloudflareAPIError.new(uri.to_s, response, content),
|
128
|
-
'Error calling Cloudflare API'
|
129
|
-
end
|
130
|
-
@logger.info("Received response from Cloudflare API (status_code: #{response.code})")
|
131
|
-
lines = parse_content(content)
|
132
|
-
return lines if multi_line
|
133
|
-
return lines[0]
|
141
|
+
return _process_response(response, multi_line)
|
134
142
|
end
|
135
143
|
end # def cloudflare_api_call
|
136
144
|
|
@@ -143,38 +151,53 @@ class LogStash::Inputs::Cloudflare < LogStash::Inputs::Base
|
|
143
151
|
raise "No zone with domain #{domain} found"
|
144
152
|
end # def cloudflare_zone_id
|
145
153
|
|
146
|
-
def
|
154
|
+
def _from_ray_id(metadata, params)
|
155
|
+
# We have the previous ray ID so we use that and set the batch_size
|
156
|
+
# in order to fetch a certain number of events
|
157
|
+
@logger.info("Previous ray_id detected: #{metadata['last_ray_id']}")
|
158
|
+
params['start_id'] = metadata['last_ray_id']
|
159
|
+
params['count'] = @batch_size
|
160
|
+
metadata['first_ray_id'] = metadata['last_ray_id']
|
161
|
+
metadata['first_timestamp'] = nil
|
162
|
+
end # def _from_ray_id
|
163
|
+
|
164
|
+
def _from_timestamp(metadata, params)
|
165
|
+
# We have the last timestamp so we use it and use the poll_interval
|
166
|
+
dt_tstamp = DateTime.strptime(metadata['last_timestamp'], '%s')
|
167
|
+
@logger.info('last_timestamp from previous run detected: '\
|
168
|
+
"#{metadata['last_timestamp']} #{dt_tstamp}")
|
169
|
+
params['start'] = metadata['last_timestamp'].to_i
|
170
|
+
params['end'] = params['start'] + @poll_interval
|
171
|
+
metadata['first_ray_id'] = nil
|
172
|
+
metadata['first_timestamp'] = params['start']
|
173
|
+
end # def _from_timestamp
|
174
|
+
|
175
|
+
def _from_neither(metadata, params)
|
176
|
+
@logger.info('last_timestamp or last_ray_id from previous run NOT set')
|
177
|
+
params['start'] = metadata['default_start_time']
|
178
|
+
params['end'] = params['start'] + @poll_interval
|
179
|
+
metadata['first_ray_id'] = nil
|
180
|
+
metadata['first_timestamp'] = params['start']
|
181
|
+
end # def _from_neither
|
182
|
+
|
183
|
+
def cloudflare_params(metadata)
|
147
184
|
params = {}
|
148
185
|
# if we have ray_id, we use that as a starting point
|
149
186
|
if metadata['last_ray_id']
|
150
|
-
|
151
|
-
params['start_id'] = metadata['last_ray_id']
|
152
|
-
params['count'] = @batch_size
|
153
|
-
metadata['first_ray_id'] = metadata['last_ray_id']
|
154
|
-
metadata['first_timestamp'] = nil
|
187
|
+
_from_ray_id(metadata, params)
|
155
188
|
elsif metadata['last_timestamp']
|
156
|
-
|
157
|
-
@logger.info('last_timestamp from previous run detected: '\
|
158
|
-
"#{metadata['last_timestamp']} #{dt_tstamp}")
|
159
|
-
params['start'] = metadata['last_timestamp'].to_i
|
160
|
-
params['end'] = params['start'] + 120
|
161
|
-
metadata['first_ray_id'] = nil
|
162
|
-
metadata['first_timestamp'] = params['start']
|
189
|
+
_from_timestamp(metadata, params)
|
163
190
|
else
|
164
|
-
|
165
|
-
params['start'] = metadata['default_start_time']
|
166
|
-
params['end'] = params['start'] + 120
|
167
|
-
metadata['first_ray_id'] = nil
|
168
|
-
metadata['first_timestamp'] = params['start']
|
191
|
+
_from_neither(metadata, params)
|
169
192
|
end
|
170
193
|
metadata['last_timestamp'] = nil
|
171
194
|
metadata['last_ray_id'] = nil
|
172
195
|
params
|
173
|
-
end # def
|
196
|
+
end # def cloudflare_params
|
174
197
|
|
175
198
|
def cloudflare_data(zone_id, metadata)
|
176
199
|
@logger.info("cloudflare_data metadata: '#{metadata}'")
|
177
|
-
params =
|
200
|
+
params = cloudflare_params(metadata)
|
178
201
|
@logger.info("Using params #{params}")
|
179
202
|
begin
|
180
203
|
entries = cloudflare_api_call("/zones/#{zone_id}/logs/requests",
|
@@ -201,50 +224,74 @@ class LogStash::Inputs::Cloudflare < LogStash::Inputs::Base
|
|
201
224
|
end
|
202
225
|
end # def fill_cloudflare_data
|
203
226
|
|
227
|
+
def process_entry(queue, metadata, entry)
|
228
|
+
# skip the first ray_id because we already processed it
|
229
|
+
# in the last run
|
230
|
+
return if metadata['first_ray_id'] && \
|
231
|
+
entry['rayId'] == metadata['first_ray_id']
|
232
|
+
event = LogStash::Event.new('host' => @host)
|
233
|
+
fill_cloudflare_data(event, entry)
|
234
|
+
decorate(event)
|
235
|
+
queue << event
|
236
|
+
metadata['last_ray_id'] = entry['rayId']
|
237
|
+
# Cloudflare provides the timestamp in nanoseconds
|
238
|
+
metadata['last_timestamp'] = entry['timestamp'] / 1_000_000_000
|
239
|
+
end # def process_entry
|
240
|
+
|
241
|
+
def _sleep_time
|
242
|
+
@logger.info("Waiting #{@poll_time} seconds before requesting data"\
|
243
|
+
'from Cloudflare again')
|
244
|
+
# We're staggering the poll_time so we don't block the worker for the whole 15s
|
245
|
+
(@poll_time * 2).times do
|
246
|
+
sleep(0.5)
|
247
|
+
end
|
248
|
+
end
|
249
|
+
|
250
|
+
def continue_or_sleep(metadata)
|
251
|
+
mod_tstamp = metadata['first_timestamp'].to_i + @poll_interval if metadata['first_timestamp']
|
252
|
+
if !metadata['last_timestamp'] && metadata['first_timestamp'] && \
|
253
|
+
mod_tstamp <= metadata['default_start_time']
|
254
|
+
# we need to increment the timestamp by 2 minutes as we haven't
|
255
|
+
# received any results in the last batch ... also make sure we
|
256
|
+
# only do this if the end date is more than 10 minutes from the
|
257
|
+
# current time
|
258
|
+
@logger.info("Incrementing start timestamp by #{@poll_interval} seconds")
|
259
|
+
metadata['last_timestamp'] = mod_tstamp
|
260
|
+
elsif metadata['last_timestamp'] < metadata['default_start_time']
|
261
|
+
# we won't need to sleep as we're trying to catch up
|
262
|
+
return
|
263
|
+
else
|
264
|
+
_sleep_time
|
265
|
+
end
|
266
|
+
end # def continue_or_sleep
|
267
|
+
|
268
|
+
def loop_worker(queue, zone_id)
|
269
|
+
metadata = read_metadata
|
270
|
+
entries = cloudflare_data(zone_id, metadata)
|
271
|
+
@logger.info("Received #{entries.length} events")
|
272
|
+
# if we only fetch one entry the odds are it's the one event that we asked for
|
273
|
+
if entries.length <= 1
|
274
|
+
@logger.info(
|
275
|
+
'Need more than 1 event to process all entries (usually because the 1 event contains the '\
|
276
|
+
'ray_id you asked for')
|
277
|
+
_sleep_time
|
278
|
+
return
|
279
|
+
end
|
280
|
+
entries.each do |entry|
|
281
|
+
process_entry(queue, metadata, entry)
|
282
|
+
end
|
283
|
+
@logger.info(metadata)
|
284
|
+
continue_or_sleep(metadata)
|
285
|
+
write_metadata(metadata)
|
286
|
+
end # def loop_worker
|
287
|
+
|
204
288
|
def run(queue)
|
205
289
|
@logger.info('Starting cloudflare run')
|
206
290
|
zone_id = cloudflare_zone_id(@domain)
|
207
291
|
@logger.info("Resolved zone ID #{zone_id} for domain #{@domain}")
|
208
292
|
until stop?
|
209
293
|
begin
|
210
|
-
|
211
|
-
entries = cloudflare_data(zone_id, metadata)
|
212
|
-
@logger.info("Received #{entries.length} events")
|
213
|
-
entries.each do |entry|
|
214
|
-
# skip the first ray_id because we already processed it
|
215
|
-
# in the last run
|
216
|
-
next if metadata['first_ray_id'] && \
|
217
|
-
entry['rayId'] == metadata['first_ray_id']
|
218
|
-
event = LogStash::Event.new('host' => @host)
|
219
|
-
fill_cloudflare_data(event, entry)
|
220
|
-
decorate(event)
|
221
|
-
queue << event
|
222
|
-
metadata['last_ray_id'] = entry['rayId']
|
223
|
-
# Cloudflare provides the timestamp in nanoseconds
|
224
|
-
metadata['last_timestamp'] = entry['timestamp'] / 1_000_000_000
|
225
|
-
end
|
226
|
-
@logger.info(metadata)
|
227
|
-
if metadata['first_timestamp']
|
228
|
-
mod_tstamp = metadata['first_timestamp'].to_i + 120
|
229
|
-
else
|
230
|
-
mod_tstamp = nil
|
231
|
-
end
|
232
|
-
if !metadata['last_timestamp'] && metadata['first_timestamp'] && \
|
233
|
-
mod_tstamp <= metadata['default_start_time']
|
234
|
-
# we need to increment the timestamp by 2 minutes as we haven't
|
235
|
-
# received any results in the last batch ... also make sure we
|
236
|
-
# only do this if the end date is more than 10 minutes from the
|
237
|
-
# current time
|
238
|
-
@logger.info('Incrementing start timestamp by 120 seconds')
|
239
|
-
metadata['last_timestamp'] = mod_tstamp
|
240
|
-
else # if
|
241
|
-
@logger.info("Waiting #{@poll_time} seconds before requesting data"\
|
242
|
-
'from Cloudflare again')
|
243
|
-
(@poll_time * 2).times do
|
244
|
-
sleep(0.5)
|
245
|
-
end
|
246
|
-
end
|
247
|
-
write_metadata(metadata)
|
294
|
+
loop_worker(queue, zone_id)
|
248
295
|
rescue => exception
|
249
296
|
break if stop?
|
250
297
|
@logger.error(exception.class)
|
@@ -27,7 +27,9 @@ RSpec.configure do |config|
|
|
27
27
|
stub_request(:get, 'https://api.cloudflare.com/client/v4/zones?status=active')
|
28
28
|
.with(headers: HEADERS)
|
29
29
|
.to_return(status: 200, body: ZONE_LIST_RESPONSE.to_json, headers: {})
|
30
|
-
stub_request(
|
30
|
+
stub_request(
|
31
|
+
:get,
|
32
|
+
%r{/api.cloudflare.com\/client\/v4\/zones\/zoneid\/logs\/requests.*/})
|
31
33
|
.with(headers: HEADERS)
|
32
34
|
.to_return(status: 200, body: LOGS_RESPONSE.to_json, headers: {})
|
33
35
|
end
|