logstash-input-dynamodb 1.0.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +2 -0
- data/LICENSE.txt +201 -0
- data/NOTICE.txt +14 -0
- data/README.md +177 -0
- data/Rakefile +7 -0
- data/lib/logstash-input-dynamodb_jars.rb +73 -0
- data/lib/logstash/inputs/DynamoDBLogParser.rb +166 -0
- data/lib/logstash/inputs/LogStashRecordProcessor.rb +68 -0
- data/lib/logstash/inputs/LogStashRecordProcessorFactory.rb +43 -0
- data/lib/logstash/inputs/dynamodb.rb +341 -0
- data/logstash-input-dynamodb.gemspec +38 -0
- data/spec/inputs/dynamodb_spec.rb +198 -0
- data/spec/log_parser_spec.rb +63 -0
- data/spec/record_processor_and_factory_spec.rb +70 -0
- data/spec/spec_helper.rb +134 -0
- metadata +162 -0
@@ -0,0 +1,166 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
+
#
|
5
|
+
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
#you may not use this file except in compliance with the License.
|
7
|
+
#You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
#Unless required by applicable law or agreed to in writing, software
|
12
|
+
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
#See the License for the specific language governing permissions and
|
15
|
+
#limitations under the License.
|
16
|
+
#
|
17
|
+
require 'java'
|
18
|
+
require 'json'
|
19
|
+
require 'bigdecimal'
|
20
|
+
require 'activesupport/json_encoder'
|
21
|
+
require 'base64'
|
22
|
+
|
23
|
+
require "logstash-input-dynamodb_jars"
|
24
|
+
java_import "com.fasterxml.jackson.databind.ObjectMapper"
|
25
|
+
java_import "com.amazonaws.services.dynamodbv2.model.AttributeValue"
|
26
|
+
java_import "com.amazonaws.dynamodb.bootstrap.AttributeValueMixIn"
|
27
|
+
|
28
|
+
module Logstash
|
29
|
+
module Inputs
|
30
|
+
module DynamoDB
|
31
|
+
class DynamoDBLogParser
|
32
|
+
|
33
|
+
MAX_NUMBER_OF_BYTES_FOR_NUMBER = 21;
|
34
|
+
|
35
|
+
def initialize(view_type, log_format, key_schema, region)
|
36
|
+
@view_type = view_type
|
37
|
+
@log_format = log_format
|
38
|
+
@mapper ||= ObjectMapper.new()
|
39
|
+
@mapper.setSerializationInclusion(JsonInclude::Include::NON_NULL)
|
40
|
+
@mapper.addMixInAnnotations(AttributeValue, AttributeValueMixIn);
|
41
|
+
@key_schema = key_schema
|
42
|
+
ActiveSupport.encode_big_decimal_as_string = false
|
43
|
+
@hash_template = Hash.new
|
44
|
+
@hash_template["eventID"] = "0"
|
45
|
+
@hash_template["eventName"] = "INSERT"
|
46
|
+
@hash_template["eventVersion"] = "1.0"
|
47
|
+
@hash_template["eventSource"] = "aws:dynamodb"
|
48
|
+
@hash_template["awsRegion"] = region
|
49
|
+
end
|
50
|
+
|
51
|
+
public
|
52
|
+
def parse_scan(log, new_image_size)
|
53
|
+
data_hash = JSON.parse(@mapper.writeValueAsString(log))
|
54
|
+
|
55
|
+
@hash_template["dynamodb"] = Hash.new
|
56
|
+
@hash_template["dynamodb"]["keys"] = Hash.new
|
57
|
+
size_bytes = calculate_key_size_in_bytes(log)
|
58
|
+
@key_schema.each { |x|
|
59
|
+
@hash_template["dynamodb"]["keys"][x] = data_hash[x]
|
60
|
+
}
|
61
|
+
unless @view_type == "keys_only"
|
62
|
+
size_bytes += new_image_size
|
63
|
+
@hash_template["dynamodb"]["newImage"] = data_hash
|
64
|
+
end
|
65
|
+
@hash_template["dynamodb"]["sequenceNumber"] = "0"
|
66
|
+
@hash_template["dynamodb"]["sizeBytes"] = size_bytes
|
67
|
+
@hash_template["dynamodb"]["streamViewType"] = @view_type.upcase
|
68
|
+
|
69
|
+
return parse_view_type(@hash_template)
|
70
|
+
end
|
71
|
+
|
72
|
+
public
|
73
|
+
def parse_stream(log)
|
74
|
+
return parse_view_type(JSON.parse(@mapper.writeValueAsString(log))["internalObject"])
|
75
|
+
end
|
76
|
+
|
77
|
+
private
|
78
|
+
def calculate_key_size_in_bytes(record)
|
79
|
+
key_size = 0
|
80
|
+
@key_schema.each { |x|
|
81
|
+
key_size += x.length
|
82
|
+
value = record.get(x)
|
83
|
+
if !(value.getB().nil?)
|
84
|
+
b = value.getB();
|
85
|
+
key_size += Base64.decode64(b).length
|
86
|
+
elsif !(value.getS().nil?)
|
87
|
+
s = value.getS();
|
88
|
+
key_size += s.length;
|
89
|
+
elsif !(value.getN().nil?)
|
90
|
+
key_size += MAX_NUMBER_OF_BYTES_FOR_NUMBER;
|
91
|
+
end
|
92
|
+
}
|
93
|
+
return key_size
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
def parse_view_type(hash)
|
98
|
+
if @log_format == LogStash::Inputs::DynamoDB::LF_PLAIN
|
99
|
+
return hash.to_json
|
100
|
+
end
|
101
|
+
case @view_type
|
102
|
+
when LogStash::Inputs::DynamoDB::VT_KEYS_ONLY
|
103
|
+
return parse_format(hash["dynamodb"]["keys"])
|
104
|
+
when LogStash::Inputs::DynamoDB::VT_OLD_IMAGE
|
105
|
+
return parse_format(hash["dynamodb"]["oldImage"])
|
106
|
+
when LogStash::Inputs::DynamoDB::VT_NEW_IMAGE
|
107
|
+
return parse_format(hash["dynamodb"]["newImage"]) #check new and old, dynamodb.
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
def parse_format(hash)
|
113
|
+
if @log_format == LogStash::Inputs::DynamoDB::LF_DYNAMODB
|
114
|
+
return hash.to_json
|
115
|
+
else
|
116
|
+
return dynamodb_to_json(hash)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
def dynamodb_to_json(hash)
|
122
|
+
return formatAttributeValueMap(hash).to_json
|
123
|
+
end
|
124
|
+
|
125
|
+
private
|
126
|
+
def formatAttributeValueMap(hash)
|
127
|
+
keys_to_delete = []
|
128
|
+
hash.each do |k, v|
|
129
|
+
dynamodb_key = v.keys.first
|
130
|
+
dynamodb_value = v.values.first
|
131
|
+
if @log_format == LogStash::Inputs::DynamoDB::LF_JSON_NO_BIN and (dynamodb_key == "BS" or dynamodb_key == "B")
|
132
|
+
keys_to_delete.push(k) # remove binary values and binary sets
|
133
|
+
next
|
134
|
+
end
|
135
|
+
hash[k] = formatAttributeValue(v.keys.first, v.values.first)
|
136
|
+
end
|
137
|
+
keys_to_delete.each {|key| hash.delete(key)}
|
138
|
+
return hash
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
def formatAttributeValue(key, value)
|
143
|
+
case key
|
144
|
+
when "M"
|
145
|
+
formatAttributeValueMap(value)
|
146
|
+
when "L"
|
147
|
+
value.map! do |v|
|
148
|
+
v = formatAttributeValue(v.keys.first, v.values.first)
|
149
|
+
end
|
150
|
+
when "NS","SS","BS"
|
151
|
+
value.map! do |v|
|
152
|
+
v = formatAttributeValue(key[0], v)
|
153
|
+
end
|
154
|
+
when "N"
|
155
|
+
BigDecimal.new(value)
|
156
|
+
when "NULL"
|
157
|
+
nil
|
158
|
+
else
|
159
|
+
value
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
+
#
|
5
|
+
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
#you may not use this file except in compliance with the License.
|
7
|
+
#You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
#Unless required by applicable law or agreed to in writing, software
|
12
|
+
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
#See the License for the specific language governing permissions and
|
15
|
+
#limitations under the License.
|
16
|
+
#
|
17
|
+
require "java"
|
18
|
+
|
19
|
+
require "logstash-input-dynamodb_jars"
|
20
|
+
java_import "com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason"
|
21
|
+
java_import "java.lang.IllegalStateException"
|
22
|
+
java_import "org.apache.log4j.LogManager"
|
23
|
+
|
24
|
+
module Logstash
|
25
|
+
module Inputs
|
26
|
+
module DynamoDB
|
27
|
+
class LogStashRecordProcessor
|
28
|
+
include com.amazonaws.services.kinesis.clientlibrary.interfaces::IRecordProcessor
|
29
|
+
|
30
|
+
attr_accessor :queue, :shard_id
|
31
|
+
|
32
|
+
def initialize(queue)
|
33
|
+
# Workaround for IRecordProcessor.initialize(String shardId) interfering with constructor.
|
34
|
+
# No good way to overload methods in JRuby, so deciding which was supposed to be called here.
|
35
|
+
if (queue.is_a? String)
|
36
|
+
@shard_id = queue
|
37
|
+
return
|
38
|
+
else
|
39
|
+
@queue ||= queue
|
40
|
+
@logger ||= LogStash::Inputs::DynamoDB.logger
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def process_records(records, checkpointer)
|
45
|
+
@logger.debug("Processing batch of " + records.size().to_s + " records")
|
46
|
+
records.each do |record|
|
47
|
+
@queue.push(record)
|
48
|
+
end
|
49
|
+
#checkpoint once all of the records have been consumed
|
50
|
+
checkpointer.checkpoint()
|
51
|
+
end
|
52
|
+
|
53
|
+
def shutdown(checkpointer, reason)
|
54
|
+
case reason
|
55
|
+
when ShutdownReason::TERMINATE
|
56
|
+
checkpointer.checkpoint()
|
57
|
+
when ShutdownReason::ZOMBIE
|
58
|
+
else
|
59
|
+
raise RuntimeError, "Invalid shutdown reason."
|
60
|
+
end
|
61
|
+
unless @shard_id.nil?
|
62
|
+
@logger.info("shutting down record processor with shardId: " + @shard_id + " with reason " + reason.to_s)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
+
#
|
5
|
+
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
#you may not use this file except in compliance with the License.
|
7
|
+
#You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
#Unless required by applicable law or agreed to in writing, software
|
12
|
+
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
#See the License for the specific language governing permissions and
|
15
|
+
#limitations under the License.
|
16
|
+
#
|
17
|
+
require 'java'
|
18
|
+
require_relative "LogStashRecordProcessor"
|
19
|
+
|
20
|
+
require "logstash-input-dynamodb_jars"
|
21
|
+
|
22
|
+
module KCL
|
23
|
+
include_package "com.amazonaws.services.kinesis.clientlibrary.interfaces"
|
24
|
+
end
|
25
|
+
|
26
|
+
module Logstash
|
27
|
+
module Inputs
|
28
|
+
module DynamoDB
|
29
|
+
class LogStashRecordProcessorFactory
|
30
|
+
include KCL::IRecordProcessorFactory
|
31
|
+
|
32
|
+
def initialize(queue)
|
33
|
+
@queue ||= queue
|
34
|
+
end
|
35
|
+
|
36
|
+
def create_processor
|
37
|
+
return Logstash::Inputs::DynamoDB::LogStashRecordProcessor.new(@queue)
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,341 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
+
#
|
5
|
+
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
+
#you may not use this file except in compliance with the License.
|
7
|
+
#You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
#Unless required by applicable law or agreed to in writing, software
|
12
|
+
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
#See the License for the specific language governing permissions and
|
15
|
+
#limitations under the License.
|
16
|
+
#
|
17
|
+
require "logstash/inputs/base"
|
18
|
+
require "logstash/namespace"
|
19
|
+
require "securerandom"
|
20
|
+
require "thread"
|
21
|
+
require "socket"
|
22
|
+
require_relative "LogStashRecordProcessorFactory"
|
23
|
+
require_relative "DynamoDBLogParser"
|
24
|
+
|
25
|
+
require "logstash-input-dynamodb_jars"
|
26
|
+
|
27
|
+
require 'java'
|
28
|
+
java_import "com.amazonaws.AmazonClientException"
|
29
|
+
java_import "org.apache.log4j.LogManager"
|
30
|
+
java_import "org.apache.log4j.Level"
|
31
|
+
java_import "com.fasterxml.jackson.annotation.JsonInclude"
|
32
|
+
java_import "com.amazonaws.regions.RegionUtils"
|
33
|
+
|
34
|
+
module AmazonDynamoDB
|
35
|
+
include_package "com.amazonaws"
|
36
|
+
include_package "com.amazonaws.services.dynamodbv2"
|
37
|
+
include_package "com.amazonaws.services.dynamodbv2.streamsadapter"
|
38
|
+
include_package "com.amazonaws.services.dynamodbv2.model"
|
39
|
+
end
|
40
|
+
module AmazonCredentials
|
41
|
+
include_package "com.amazonaws.auth"
|
42
|
+
include_package "com.amazonaws.internal"
|
43
|
+
end
|
44
|
+
|
45
|
+
module DynamoDBBootstrap
|
46
|
+
include_package "com.amazonaws.dynamodb.bootstrap"
|
47
|
+
end
|
48
|
+
|
49
|
+
module CloudWatch
|
50
|
+
include_package "com.amazonaws.services.cloudwatch"
|
51
|
+
end
|
52
|
+
|
53
|
+
module KCL
|
54
|
+
include_package "com.amazonaws.services.kinesis.clientlibrary.lib.worker"
|
55
|
+
end
|
56
|
+
|
57
|
+
#DynamoDBStreams plugin that will first scan the DynamoDB table
|
58
|
+
#and then consume streams and push those records into Logstash
|
59
|
+
class LogStash::Inputs::DynamoDB < LogStash::Inputs::Base
|
60
|
+
config_name "dynamodb"
|
61
|
+
|
62
|
+
USER_AGENT = " logstash-input-dynamodb/1.0.0".freeze
|
63
|
+
|
64
|
+
LF_DYNAMODB = "dymamodb".freeze
|
65
|
+
LF_JSON_NO_BIN = "json_drop_binary".freeze
|
66
|
+
LF_PLAIN = "plain".freeze
|
67
|
+
LF_JSON_BIN_AS_TEXT = "json_binary_as_text".freeze
|
68
|
+
VT_KEYS_ONLY = "keys_only".freeze
|
69
|
+
VT_OLD_IMAGE = "old_image".freeze
|
70
|
+
VT_NEW_IMAGE = "new_image".freeze
|
71
|
+
VT_ALL_IMAGES = "new_and_old_images".freeze
|
72
|
+
|
73
|
+
default :codec, 'json'
|
74
|
+
|
75
|
+
# The name of the table to copy and stream through Logstash
|
76
|
+
config :table_name, :validate => :string, :required => true
|
77
|
+
|
78
|
+
# Configuration for what information from the scan and streams to include in the log.
|
79
|
+
# keys_only will return the hash and range keys along with the values for each entry
|
80
|
+
# new_image will return the entire new entry and keys
|
81
|
+
# old_image will return the entire entry before modification and keys (NOTE: Cannot perform scan when using this option)
|
82
|
+
# new_and_old_images will return the old entry before modification along with the new entry and keys
|
83
|
+
config :view_type, :validate => [VT_KEYS_ONLY, VT_OLD_IMAGE, VT_NEW_IMAGE, VT_ALL_IMAGES], :required => true
|
84
|
+
|
85
|
+
# Endpoint from which the table is located. Example: dynamodb.us-east-1.amazonaws.com
|
86
|
+
config :endpoint, :validate => :string, :required => true
|
87
|
+
|
88
|
+
# Endpoint from which streams should read. Example: streams.dynamodb.us-east-1.amazonaws.com
|
89
|
+
config :streams_endpoint, :validate => :string
|
90
|
+
|
91
|
+
# AWS credentials access key.
|
92
|
+
config :aws_access_key_id, :validate => :string, :default => ""
|
93
|
+
|
94
|
+
# AWS credentials secret access key.
|
95
|
+
config :aws_secret_access_key, :validate => :string, :default => ""
|
96
|
+
|
97
|
+
# A flag to indicate whether or not the plugin should scan the entire table before streaming new records.
|
98
|
+
# Streams will only push records that are less than 24 hours old, so in order to get the entire table
|
99
|
+
# an initial scan must be done.
|
100
|
+
config :perform_scan, :validate => :boolean, :default => true
|
101
|
+
|
102
|
+
# A string that uniquely identifies the KCL checkpointer name and cloudwatch metrics name.
|
103
|
+
# This is used when one worker leaves a shard so that another worker knows where to start again.
|
104
|
+
config :checkpointer, :validate => :string, :default => "logstash_input_dynamodb_cptr"
|
105
|
+
|
106
|
+
# Option to publish metrics to Cloudwatch using the checkpointer name.
|
107
|
+
config :publish_metrics, :validate => :boolean, :default => false
|
108
|
+
|
109
|
+
# Option to not automatically stream new data into logstash from DynamoDB streams.
|
110
|
+
config :perform_stream, :validate => :boolean, :default => true
|
111
|
+
|
112
|
+
# Number of read operations per second to perform when scanning the specified table.
|
113
|
+
config :read_ops, :validate => :number, :default => 1
|
114
|
+
|
115
|
+
# Number of threads to use when scanning the specified table
|
116
|
+
config :number_of_scan_threads, :validate => :number, :default => 1
|
117
|
+
|
118
|
+
# Number of threads to write to the logstash queue when scanning the table
|
119
|
+
config :number_of_write_threads, :validate => :number, :default => 1
|
120
|
+
|
121
|
+
# Configuation for how the logs will be transferred.
|
122
|
+
# plain is simply pass the message along without editing it.
|
123
|
+
# dynamodb will return just the data specified in the view_format in dynamodb format.
|
124
|
+
# For more information see: docs.aws.amazon.com/amazondynamodb/latest/developerguide/DataFormat.html
|
125
|
+
# json_drop_binary will return just the data specified in the view_format in JSON while not including any binary values that were present.
|
126
|
+
# json_binary_as_text will return just the data specified in the view_format in JSON while including binary values as base64-encoded text.
|
127
|
+
config :log_format, :validate => [LF_PLAIN, LF_DYNAMODB, LF_JSON_NO_BIN, LF_JSON_BIN_AS_TEXT], :default => "plain"
|
128
|
+
|
129
|
+
public
|
130
|
+
def build_credentials
|
131
|
+
if !@aws_access_key_id.to_s.empty? and !@aws_secret_access_key.to_s.empty?
|
132
|
+
@logger.info("Using static credentials: " + @aws_access_key_id + ", " + @aws_secret_access_key)
|
133
|
+
basic = AmazonCredentials::BasicAWSCredentials.new(@aws_access_key_id, @aws_secret_access_key)
|
134
|
+
return AmazonCredentials::StaticCredentialsProvider.new(basic)
|
135
|
+
else
|
136
|
+
@logger.info("Using default provider chain")
|
137
|
+
return AmazonCredentials::DefaultAWSCredentialsProviderChain.new()
|
138
|
+
end # if neither aws access keys
|
139
|
+
end # def build_credentials
|
140
|
+
|
141
|
+
public
|
142
|
+
def register
|
143
|
+
LogStash::Logger.setup_log4j(@logger)
|
144
|
+
|
145
|
+
@host = Socket.gethostname
|
146
|
+
@logger.info("Tablename: " + @table_name)
|
147
|
+
@queue = SizedQueue.new(20)
|
148
|
+
@credentials = build_credentials()
|
149
|
+
@logger.info("Checkpointer: " + @checkpointer)
|
150
|
+
|
151
|
+
if @perform_scan and @view_type == VT_OLD_IMAGE
|
152
|
+
raise(LogStash::ConfigurationError, "Cannot perform scan with view type: " + @view_type + " configuration")
|
153
|
+
end
|
154
|
+
if @view_type == VT_ALL_IMAGES and !(@log_format == LF_PLAIN)
|
155
|
+
raise(LogStash::ConfigurationError, "Cannot show view_type: " + @view_type + ", with log_format: " + @log_format)
|
156
|
+
end
|
157
|
+
|
158
|
+
#Create DynamoDB Client
|
159
|
+
@client_configuration = AmazonDynamoDB::ClientConfiguration.new()
|
160
|
+
@client_configuration.setUserAgent(@client_configuration.getUserAgent() + USER_AGENT)
|
161
|
+
@dynamodb_client = AmazonDynamoDB::AmazonDynamoDBClient.new(@credentials, @client_configuration)
|
162
|
+
|
163
|
+
@logger.info(@dynamodb_client.to_s)
|
164
|
+
|
165
|
+
@dynamodb_client.setEndpoint(@endpoint)
|
166
|
+
@logger.info("DynamoDB endpoint: " + @endpoint)
|
167
|
+
|
168
|
+
@key_schema = Array.new
|
169
|
+
@table_description = @dynamodb_client.describeTable(@table_name).getTable()
|
170
|
+
key_iterator = @table_description.getKeySchema().iterator()
|
171
|
+
while(key_iterator.hasNext())
|
172
|
+
@key_schema.push(key_iterator.next().getAttributeName().to_s)
|
173
|
+
end
|
174
|
+
region = RegionUtils.getRegionByEndpoint(@endpoint)
|
175
|
+
|
176
|
+
@parser ||= Logstash::Inputs::DynamoDB::DynamoDBLogParser.new(@view_type, @log_format, @key_schema, region)
|
177
|
+
|
178
|
+
if @perform_stream
|
179
|
+
setup_stream
|
180
|
+
end # unless @perform_stream
|
181
|
+
end # def register
|
182
|
+
|
183
|
+
public
|
184
|
+
def run(logstash_queue)
|
185
|
+
begin
|
186
|
+
run_with_catch(logstash_queue)
|
187
|
+
rescue LogStash::ShutdownSignal
|
188
|
+
exit_threads
|
189
|
+
until @queue.empty?
|
190
|
+
@logger.info("Flushing rest of events in logstash queue")
|
191
|
+
event = @queue.pop()
|
192
|
+
queue_event(@parser.parse_stream(event), logstash_queue, @host)
|
193
|
+
end # until !@queue.empty?
|
194
|
+
end # begin
|
195
|
+
end # def run(logstash_queue)
|
196
|
+
|
197
|
+
# Starts KCL app in a background thread
|
198
|
+
# Starts parallel scan if need be in a background thread
|
199
|
+
private
|
200
|
+
def run_with_catch(logstash_queue)
|
201
|
+
if @perform_scan
|
202
|
+
scan(logstash_queue)
|
203
|
+
end # if @perform_scan
|
204
|
+
|
205
|
+
# Once scan is finished, start kcl thread to read from streams
|
206
|
+
if @perform_stream
|
207
|
+
stream(logstash_queue)
|
208
|
+
end # unless @perform_stream
|
209
|
+
end # def run
|
210
|
+
|
211
|
+
private
|
212
|
+
def setup_stream
|
213
|
+
worker_id = SecureRandom.uuid()
|
214
|
+
@logger.info("WorkerId: " + worker_id)
|
215
|
+
|
216
|
+
dynamodb_streams_client = AmazonDynamoDB::AmazonDynamoDBStreamsClient.new(@credentials, @client_configuration)
|
217
|
+
adapter = Java::ComAmazonawsServicesDynamodbv2Streamsadapter::AmazonDynamoDBStreamsAdapterClient.new(@credentials)
|
218
|
+
if !@streams_endpoint.nil?
|
219
|
+
adapter.setEndpoint(@streams_endpoint)
|
220
|
+
dynamodb_streams_client.setEndpoint(@streams_endpoint)
|
221
|
+
@logger.info("DynamoDB Streams endpoint: " + @streams_endpoint)
|
222
|
+
else
|
223
|
+
raise(LogStash::ConfigurationError, "Cannot stream without a configured streams endpoint")
|
224
|
+
end # if not @streams_endpoint.to_s.empty?
|
225
|
+
|
226
|
+
stream_arn = nil
|
227
|
+
begin
|
228
|
+
stream_arn = @table_description.getLatestStreamArn()
|
229
|
+
stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \
|
230
|
+
.withStreamArn(stream_arn)).getStreamDescription()
|
231
|
+
|
232
|
+
stream_status = stream_description.getStreamStatus()
|
233
|
+
|
234
|
+
stream_view_type = stream_description.getStreamViewType().to_s.downcase
|
235
|
+
unless (stream_view_type == @view_type or @view_type == VT_KEYS_ONLY or stream_view_type == VT_ALL_IMAGES)
|
236
|
+
raise(LogStash::ConfigurationError, "Cannot stream " + @view_type + " when stream is setup for " + stream_view_type)
|
237
|
+
end
|
238
|
+
|
239
|
+
while stream_status == "ENABLING"
|
240
|
+
if(stream_status == "ENABLING")
|
241
|
+
@logger.info("Sleeping until stream is enabled")
|
242
|
+
sleep(1)
|
243
|
+
end # if stream_status == "ENABLING"
|
244
|
+
stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \
|
245
|
+
.withStreamArn(stream_arn)).getStreamDescription()
|
246
|
+
stream_status = stream_description.getStreamStatus()
|
247
|
+
end # while not active
|
248
|
+
|
249
|
+
if !(stream_status == "ENABLED")
|
250
|
+
raise(LogStash::PluginLoadingError, "No streams are enabled")
|
251
|
+
end # if not active
|
252
|
+
@logger.info("Stream Id: " + stream_arn)
|
253
|
+
rescue AmazonDynamoDB::ResourceNotFoundException => rnfe
|
254
|
+
raise(LogStash::PluginLoadingError, rnfe.message)
|
255
|
+
rescue AmazonClientException => ace
|
256
|
+
raise(LogStash::ConfigurationError, "AWS credentials invalid or not found in the provider chain\n" + ace.message)
|
257
|
+
end # begin
|
258
|
+
|
259
|
+
kcl_config = KCL::KinesisClientLibConfiguration.new(@checkpointer, stream_arn, @credentials, worker_id) \
|
260
|
+
.withInitialPositionInStream(KCL::InitialPositionInStream::TRIM_HORIZON)
|
261
|
+
cloudwatch_client = nil
|
262
|
+
if @publish_metrics
|
263
|
+
cloudwatch_client = CloudWatch::AmazonCloudWatchClient.new(@credentials)
|
264
|
+
else
|
265
|
+
kclMetricsLogger = LogManager.getLogger("com.amazonaws.services.kinesis.metrics")
|
266
|
+
kclMetricsLogger.setAdditivity(false)
|
267
|
+
kclMetricsLogger.setLevel(Level::OFF)
|
268
|
+
end # if @publish_metrics
|
269
|
+
@worker = KCL::Worker.new(Logstash::Inputs::DynamoDB::LogStashRecordProcessorFactory.new(@queue), kcl_config, adapter, @dynamodb_client, cloudwatch_client)
|
270
|
+
end # def setup_stream
|
271
|
+
|
272
|
+
private
|
273
|
+
def scan(logstash_queue)
|
274
|
+
@logger.info("Starting scan...")
|
275
|
+
@logstash_writer = DynamoDBBootstrap::BlockingQueueConsumer.new(@number_of_write_threads)
|
276
|
+
|
277
|
+
@connector = DynamoDBBootstrap::DynamoDBBootstrapWorker.new(@dynamodb_client, @read_ops, @table_name, @number_of_scan_threads)
|
278
|
+
start_table_copy_thread
|
279
|
+
|
280
|
+
scan_queue = @logstash_writer.getQueue()
|
281
|
+
while true
|
282
|
+
event = scan_queue.take()
|
283
|
+
if event.getEntry().nil? and event.getSize() == -1
|
284
|
+
break
|
285
|
+
end # if event.isEmpty()
|
286
|
+
queue_event(@parser.parse_scan(event.getEntry(), event.getSize()), logstash_queue, @host)
|
287
|
+
end # while true
|
288
|
+
end
|
289
|
+
|
290
|
+
private
|
291
|
+
def stream(logstash_queue)
|
292
|
+
@logger.info("Starting stream...")
|
293
|
+
start_kcl_thread
|
294
|
+
|
295
|
+
while true
|
296
|
+
event = @queue.pop()
|
297
|
+
queue_event(@parser.parse_stream(event), logstash_queue, @host)
|
298
|
+
end # while true
|
299
|
+
end
|
300
|
+
|
301
|
+
private
|
302
|
+
def exit_threads
|
303
|
+
unless @dynamodb_scan_thread.nil?
|
304
|
+
@dynamodb_scan_thread.exit
|
305
|
+
end # unless @dynamodb_scan_thread.nil?
|
306
|
+
|
307
|
+
unless @kcl_thread.nil?
|
308
|
+
@kcl_thread.exit
|
309
|
+
end # unless @kcl_thread.nil?
|
310
|
+
end # def exit_threads
|
311
|
+
|
312
|
+
public
|
313
|
+
def queue_event(event, logstash_queue, event_host)
|
314
|
+
logstash_event = LogStash::Event.new("message" => event, "host" => event_host)
|
315
|
+
decorate(logstash_event)
|
316
|
+
logstash_queue << logstash_event
|
317
|
+
end # def queue_event
|
318
|
+
|
319
|
+
private
|
320
|
+
def start_table_copy_thread
|
321
|
+
@dynamodb_scan_thread = Thread.new(@connector, @logstash_writer) {
|
322
|
+
begin
|
323
|
+
@connector.pipe(@logstash_writer)
|
324
|
+
rescue Exception => e
|
325
|
+
abort("Scanning the table caused an error.\n" + e.message)
|
326
|
+
end # begin
|
327
|
+
}
|
328
|
+
end # def start_table_copy_thread()
|
329
|
+
|
330
|
+
private
|
331
|
+
def start_kcl_thread
|
332
|
+
@kcl_thread = Thread.new(@worker) {
|
333
|
+
begin
|
334
|
+
@worker.run()
|
335
|
+
rescue Exception => e
|
336
|
+
abort("KCL worker encountered an error.\n" + e.message)
|
337
|
+
end # begin
|
338
|
+
}
|
339
|
+
end # def start_kcl_thread
|
340
|
+
|
341
|
+
end # class Logstash::Inputs::DynamoDB
|