logstash-input-dynamodb 1.0.4-java → 2.0.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- metadata +18 -33
- data/Gemfile +0 -2
- data/LICENSE.txt +0 -201
- data/NOTICE.txt +0 -14
- data/README.md +0 -177
- data/Rakefile +0 -7
- data/lib/logstash-input-dynamodb_jars.rb +0 -25
- data/lib/logstash/inputs/DynamoDBLogParser.rb +0 -166
- data/lib/logstash/inputs/LogStashRecordProcessor.rb +0 -68
- data/lib/logstash/inputs/LogStashRecordProcessorFactory.rb +0 -43
- data/lib/logstash/inputs/dynamodb.rb +0 -341
- data/logstash-input-dynamodb.gemspec +0 -40
- data/spec/inputs/dynamodb_spec.rb +0 -198
- data/spec/log_parser_spec.rb +0 -63
- data/spec/record_processor_and_factory_spec.rb +0 -70
- data/spec/spec_helper.rb +0 -134
data/Rakefile
DELETED
@@ -1,25 +0,0 @@
|
|
1
|
-
# this is a generated file, to avoid over-writing it just delete this comment
|
2
|
-
require 'jar_dependencies'
|
3
|
-
|
4
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-kinesis', '1.10.60' )
|
5
|
-
require_jar( 'com.fasterxml.jackson.core', 'jackson-core', '2.5.3' )
|
6
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-core', '1.10.60' )
|
7
|
-
require_jar( 'log4j', 'log4j', '1.2.17' )
|
8
|
-
require_jar( 'commons-codec', 'commons-codec', '1.9' )
|
9
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-cloudwatch', '1.10.20' )
|
10
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-s3', '1.10.60' )
|
11
|
-
require_jar( 'com.beust', 'jcommander', '1.48' )
|
12
|
-
require_jar( 'com.amazonaws', 'dynamodb-import-export-tool', '1.0.0' )
|
13
|
-
require_jar( 'commons-lang', 'commons-lang', '2.6' )
|
14
|
-
require_jar( 'joda-time', 'joda-time', '2.8.1' )
|
15
|
-
require_jar( 'commons-logging', 'commons-logging', '1.1.3' )
|
16
|
-
require_jar( 'com.amazonaws', 'dynamodb-streams-kinesis-adapter', '1.0.2' )
|
17
|
-
require_jar( 'com.fasterxml.jackson.core', 'jackson-databind', '2.5.3' )
|
18
|
-
require_jar( 'com.amazonaws', 'amazon-kinesis-client', '1.6.1' )
|
19
|
-
require_jar( 'com.fasterxml.jackson.core', 'jackson-annotations', '2.5.0' )
|
20
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-dynamodb', '1.10.60' )
|
21
|
-
require_jar( 'com.amazonaws', 'aws-java-sdk-kms', '1.10.60' )
|
22
|
-
require_jar( 'org.apache.httpcomponents', 'httpclient', '4.5.2' )
|
23
|
-
require_jar( 'org.apache.httpcomponents', 'httpcore', '4.4.4' )
|
24
|
-
require_jar( 'com.google.protobuf', 'protobuf-java', '2.6.1' )
|
25
|
-
require_jar( 'com.google.guava', 'guava', '15.0' )
|
@@ -1,166 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
-
#
|
5
|
-
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
#you may not use this file except in compliance with the License.
|
7
|
-
#You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
#Unless required by applicable law or agreed to in writing, software
|
12
|
-
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
#See the License for the specific language governing permissions and
|
15
|
-
#limitations under the License.
|
16
|
-
#
|
17
|
-
require 'java'
|
18
|
-
require 'json'
|
19
|
-
require 'bigdecimal'
|
20
|
-
require 'activesupport/json_encoder'
|
21
|
-
require 'base64'
|
22
|
-
|
23
|
-
require "logstash-input-dynamodb_jars"
|
24
|
-
java_import "com.fasterxml.jackson.databind.ObjectMapper"
|
25
|
-
java_import "com.amazonaws.services.dynamodbv2.model.AttributeValue"
|
26
|
-
java_import "com.amazonaws.dynamodb.bootstrap.AttributeValueMixIn"
|
27
|
-
|
28
|
-
module Logstash
|
29
|
-
module Inputs
|
30
|
-
module DynamoDB
|
31
|
-
class DynamoDBLogParser
|
32
|
-
|
33
|
-
MAX_NUMBER_OF_BYTES_FOR_NUMBER = 21;
|
34
|
-
|
35
|
-
def initialize(view_type, log_format, key_schema, region)
|
36
|
-
@view_type = view_type
|
37
|
-
@log_format = log_format
|
38
|
-
@mapper ||= ObjectMapper.new()
|
39
|
-
@mapper.setSerializationInclusion(JsonInclude::Include::NON_NULL)
|
40
|
-
@mapper.addMixInAnnotations(AttributeValue, AttributeValueMixIn);
|
41
|
-
@key_schema = key_schema
|
42
|
-
ActiveSupport.encode_big_decimal_as_string = false
|
43
|
-
@hash_template = Hash.new
|
44
|
-
@hash_template["eventID"] = "0"
|
45
|
-
@hash_template["eventName"] = "INSERT"
|
46
|
-
@hash_template["eventVersion"] = "1.0"
|
47
|
-
@hash_template["eventSource"] = "aws:dynamodb"
|
48
|
-
@hash_template["awsRegion"] = region
|
49
|
-
end
|
50
|
-
|
51
|
-
public
|
52
|
-
def parse_scan(log, new_image_size)
|
53
|
-
data_hash = JSON.parse(@mapper.writeValueAsString(log))
|
54
|
-
|
55
|
-
@hash_template["dynamodb"] = Hash.new
|
56
|
-
@hash_template["dynamodb"]["keys"] = Hash.new
|
57
|
-
size_bytes = calculate_key_size_in_bytes(log)
|
58
|
-
@key_schema.each { |x|
|
59
|
-
@hash_template["dynamodb"]["keys"][x] = data_hash[x]
|
60
|
-
}
|
61
|
-
unless @view_type == "keys_only"
|
62
|
-
size_bytes += new_image_size
|
63
|
-
@hash_template["dynamodb"]["newImage"] = data_hash
|
64
|
-
end
|
65
|
-
@hash_template["dynamodb"]["sequenceNumber"] = "0"
|
66
|
-
@hash_template["dynamodb"]["sizeBytes"] = size_bytes
|
67
|
-
@hash_template["dynamodb"]["streamViewType"] = @view_type.upcase
|
68
|
-
|
69
|
-
return parse_view_type(@hash_template)
|
70
|
-
end
|
71
|
-
|
72
|
-
public
|
73
|
-
def parse_stream(log)
|
74
|
-
return parse_view_type(JSON.parse(@mapper.writeValueAsString(log))["internalObject"])
|
75
|
-
end
|
76
|
-
|
77
|
-
private
|
78
|
-
def calculate_key_size_in_bytes(record)
|
79
|
-
key_size = 0
|
80
|
-
@key_schema.each { |x|
|
81
|
-
key_size += x.length
|
82
|
-
value = record.get(x)
|
83
|
-
if !(value.getB().nil?)
|
84
|
-
b = value.getB();
|
85
|
-
key_size += Base64.decode64(b).length
|
86
|
-
elsif !(value.getS().nil?)
|
87
|
-
s = value.getS();
|
88
|
-
key_size += s.length;
|
89
|
-
elsif !(value.getN().nil?)
|
90
|
-
key_size += MAX_NUMBER_OF_BYTES_FOR_NUMBER;
|
91
|
-
end
|
92
|
-
}
|
93
|
-
return key_size
|
94
|
-
end
|
95
|
-
|
96
|
-
private
|
97
|
-
def parse_view_type(hash)
|
98
|
-
if @log_format == LogStash::Inputs::DynamoDB::LF_PLAIN
|
99
|
-
return hash.to_json
|
100
|
-
end
|
101
|
-
case @view_type
|
102
|
-
when LogStash::Inputs::DynamoDB::VT_KEYS_ONLY
|
103
|
-
return parse_format(hash["dynamodb"]["keys"])
|
104
|
-
when LogStash::Inputs::DynamoDB::VT_OLD_IMAGE
|
105
|
-
return parse_format(hash["dynamodb"]["oldImage"])
|
106
|
-
when LogStash::Inputs::DynamoDB::VT_NEW_IMAGE
|
107
|
-
return parse_format(hash["dynamodb"]["newImage"]) #check new and old, dynamodb.
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
private
|
112
|
-
def parse_format(hash)
|
113
|
-
if @log_format == LogStash::Inputs::DynamoDB::LF_DYNAMODB
|
114
|
-
return hash.to_json
|
115
|
-
else
|
116
|
-
return dynamodb_to_json(hash)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
private
|
121
|
-
def dynamodb_to_json(hash)
|
122
|
-
return formatAttributeValueMap(hash).to_json
|
123
|
-
end
|
124
|
-
|
125
|
-
private
|
126
|
-
def formatAttributeValueMap(hash)
|
127
|
-
keys_to_delete = []
|
128
|
-
hash.each do |k, v|
|
129
|
-
dynamodb_key = v.keys.first
|
130
|
-
dynamodb_value = v.values.first
|
131
|
-
if @log_format == LogStash::Inputs::DynamoDB::LF_JSON_NO_BIN and (dynamodb_key == "BS" or dynamodb_key == "B")
|
132
|
-
keys_to_delete.push(k) # remove binary values and binary sets
|
133
|
-
next
|
134
|
-
end
|
135
|
-
hash[k] = formatAttributeValue(v.keys.first, v.values.first)
|
136
|
-
end
|
137
|
-
keys_to_delete.each {|key| hash.delete(key)}
|
138
|
-
return hash
|
139
|
-
end
|
140
|
-
|
141
|
-
private
|
142
|
-
def formatAttributeValue(key, value)
|
143
|
-
case key
|
144
|
-
when "M"
|
145
|
-
formatAttributeValueMap(value)
|
146
|
-
when "L"
|
147
|
-
value.map! do |v|
|
148
|
-
v = formatAttributeValue(v.keys.first, v.values.first)
|
149
|
-
end
|
150
|
-
when "NS","SS","BS"
|
151
|
-
value.map! do |v|
|
152
|
-
v = formatAttributeValue(key[0], v)
|
153
|
-
end
|
154
|
-
when "N"
|
155
|
-
BigDecimal.new(value)
|
156
|
-
when "NULL"
|
157
|
-
nil
|
158
|
-
else
|
159
|
-
value
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
end
|
164
|
-
end
|
165
|
-
end
|
166
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
-
#
|
5
|
-
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
#you may not use this file except in compliance with the License.
|
7
|
-
#You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
#Unless required by applicable law or agreed to in writing, software
|
12
|
-
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
#See the License for the specific language governing permissions and
|
15
|
-
#limitations under the License.
|
16
|
-
#
|
17
|
-
require "java"
|
18
|
-
|
19
|
-
require "logstash-input-dynamodb_jars"
|
20
|
-
java_import "com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason"
|
21
|
-
java_import "java.lang.IllegalStateException"
|
22
|
-
java_import "org.apache.log4j.LogManager"
|
23
|
-
|
24
|
-
module Logstash
|
25
|
-
module Inputs
|
26
|
-
module DynamoDB
|
27
|
-
class LogStashRecordProcessor
|
28
|
-
include com.amazonaws.services.kinesis.clientlibrary.interfaces::IRecordProcessor
|
29
|
-
|
30
|
-
attr_accessor :queue, :shard_id
|
31
|
-
|
32
|
-
def initialize(queue)
|
33
|
-
# Workaround for IRecordProcessor.initialize(String shardId) interfering with constructor.
|
34
|
-
# No good way to overload methods in JRuby, so deciding which was supposed to be called here.
|
35
|
-
if (queue.is_a? String)
|
36
|
-
@shard_id = queue
|
37
|
-
return
|
38
|
-
else
|
39
|
-
@queue ||= queue
|
40
|
-
@logger ||= LogStash::Inputs::DynamoDB.logger
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def process_records(records, checkpointer)
|
45
|
-
@logger.debug("Processing batch of " + records.size().to_s + " records")
|
46
|
-
records.each do |record|
|
47
|
-
@queue.push(record)
|
48
|
-
end
|
49
|
-
#checkpoint once all of the records have been consumed
|
50
|
-
checkpointer.checkpoint()
|
51
|
-
end
|
52
|
-
|
53
|
-
def shutdown(checkpointer, reason)
|
54
|
-
case reason
|
55
|
-
when ShutdownReason::TERMINATE
|
56
|
-
checkpointer.checkpoint()
|
57
|
-
when ShutdownReason::ZOMBIE
|
58
|
-
else
|
59
|
-
raise RuntimeError, "Invalid shutdown reason."
|
60
|
-
end
|
61
|
-
unless @shard_id.nil?
|
62
|
-
@logger.info("shutting down record processor with shardId: " + @shard_id + " with reason " + reason.to_s)
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
end
|
68
|
-
end
|
@@ -1,43 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
-
#
|
5
|
-
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
#you may not use this file except in compliance with the License.
|
7
|
-
#You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
#Unless required by applicable law or agreed to in writing, software
|
12
|
-
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
#See the License for the specific language governing permissions and
|
15
|
-
#limitations under the License.
|
16
|
-
#
|
17
|
-
require 'java'
|
18
|
-
require_relative "LogStashRecordProcessor"
|
19
|
-
|
20
|
-
require "logstash-input-dynamodb_jars"
|
21
|
-
|
22
|
-
module KCL
|
23
|
-
include_package "com.amazonaws.services.kinesis.clientlibrary.interfaces"
|
24
|
-
end
|
25
|
-
|
26
|
-
module Logstash
|
27
|
-
module Inputs
|
28
|
-
module DynamoDB
|
29
|
-
class LogStashRecordProcessorFactory
|
30
|
-
include KCL::IRecordProcessorFactory
|
31
|
-
|
32
|
-
def initialize(queue)
|
33
|
-
@queue ||= queue
|
34
|
-
end
|
35
|
-
|
36
|
-
def create_processor
|
37
|
-
return Logstash::Inputs::DynamoDB::LogStashRecordProcessor.new(@queue)
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
@@ -1,341 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
|
-
#Copyright 2015 Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
4
|
-
#
|
5
|
-
#Licensed under the Apache License, Version 2.0 (the "License");
|
6
|
-
#you may not use this file except in compliance with the License.
|
7
|
-
#You may obtain a copy of the License at
|
8
|
-
#
|
9
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
-
#
|
11
|
-
#Unless required by applicable law or agreed to in writing, software
|
12
|
-
#distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
-
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
-
#See the License for the specific language governing permissions and
|
15
|
-
#limitations under the License.
|
16
|
-
#
|
17
|
-
require "logstash/inputs/base"
|
18
|
-
require "logstash/namespace"
|
19
|
-
require "securerandom"
|
20
|
-
require "thread"
|
21
|
-
require "socket"
|
22
|
-
require_relative "LogStashRecordProcessorFactory"
|
23
|
-
require_relative "DynamoDBLogParser"
|
24
|
-
|
25
|
-
require "logstash-input-dynamodb_jars"
|
26
|
-
|
27
|
-
require 'java'
|
28
|
-
java_import "com.amazonaws.AmazonClientException"
|
29
|
-
java_import "org.apache.log4j.LogManager"
|
30
|
-
java_import "org.apache.log4j.Level"
|
31
|
-
java_import "com.fasterxml.jackson.annotation.JsonInclude"
|
32
|
-
java_import "com.amazonaws.regions.RegionUtils"
|
33
|
-
|
34
|
-
module AmazonDynamoDB
|
35
|
-
include_package "com.amazonaws"
|
36
|
-
include_package "com.amazonaws.services.dynamodbv2"
|
37
|
-
include_package "com.amazonaws.services.dynamodbv2.streamsadapter"
|
38
|
-
include_package "com.amazonaws.services.dynamodbv2.model"
|
39
|
-
end
|
40
|
-
module AmazonCredentials
|
41
|
-
include_package "com.amazonaws.auth"
|
42
|
-
include_package "com.amazonaws.internal"
|
43
|
-
end
|
44
|
-
|
45
|
-
module DynamoDBBootstrap
|
46
|
-
include_package "com.amazonaws.dynamodb.bootstrap"
|
47
|
-
end
|
48
|
-
|
49
|
-
module CloudWatch
|
50
|
-
include_package "com.amazonaws.services.cloudwatch"
|
51
|
-
end
|
52
|
-
|
53
|
-
module KCL
|
54
|
-
include_package "com.amazonaws.services.kinesis.clientlibrary.lib.worker"
|
55
|
-
end
|
56
|
-
|
57
|
-
#DynamoDBStreams plugin that will first scan the DynamoDB table
|
58
|
-
#and then consume streams and push those records into Logstash
|
59
|
-
class LogStash::Inputs::DynamoDB < LogStash::Inputs::Base
|
60
|
-
config_name "dynamodb"
|
61
|
-
|
62
|
-
USER_AGENT = " logstash-input-dynamodb/1.0.0".freeze
|
63
|
-
|
64
|
-
LF_DYNAMODB = "dymamodb".freeze
|
65
|
-
LF_JSON_NO_BIN = "json_drop_binary".freeze
|
66
|
-
LF_PLAIN = "plain".freeze
|
67
|
-
LF_JSON_BIN_AS_TEXT = "json_binary_as_text".freeze
|
68
|
-
VT_KEYS_ONLY = "keys_only".freeze
|
69
|
-
VT_OLD_IMAGE = "old_image".freeze
|
70
|
-
VT_NEW_IMAGE = "new_image".freeze
|
71
|
-
VT_ALL_IMAGES = "new_and_old_images".freeze
|
72
|
-
|
73
|
-
default :codec, 'json'
|
74
|
-
|
75
|
-
# The name of the table to copy and stream through Logstash
|
76
|
-
config :table_name, :validate => :string, :required => true
|
77
|
-
|
78
|
-
# Configuration for what information from the scan and streams to include in the log.
|
79
|
-
# keys_only will return the hash and range keys along with the values for each entry
|
80
|
-
# new_image will return the entire new entry and keys
|
81
|
-
# old_image will return the entire entry before modification and keys (NOTE: Cannot perform scan when using this option)
|
82
|
-
# new_and_old_images will return the old entry before modification along with the new entry and keys
|
83
|
-
config :view_type, :validate => [VT_KEYS_ONLY, VT_OLD_IMAGE, VT_NEW_IMAGE, VT_ALL_IMAGES], :required => true
|
84
|
-
|
85
|
-
# Endpoint from which the table is located. Example: dynamodb.us-east-1.amazonaws.com
|
86
|
-
config :endpoint, :validate => :string, :required => true
|
87
|
-
|
88
|
-
# Endpoint from which streams should read. Example: streams.dynamodb.us-east-1.amazonaws.com
|
89
|
-
config :streams_endpoint, :validate => :string
|
90
|
-
|
91
|
-
# AWS credentials access key.
|
92
|
-
config :aws_access_key_id, :validate => :string, :default => ""
|
93
|
-
|
94
|
-
# AWS credentials secret access key.
|
95
|
-
config :aws_secret_access_key, :validate => :string, :default => ""
|
96
|
-
|
97
|
-
# A flag to indicate whether or not the plugin should scan the entire table before streaming new records.
|
98
|
-
# Streams will only push records that are less than 24 hours old, so in order to get the entire table
|
99
|
-
# an initial scan must be done.
|
100
|
-
config :perform_scan, :validate => :boolean, :default => true
|
101
|
-
|
102
|
-
# A string that uniquely identifies the KCL checkpointer name and cloudwatch metrics name.
|
103
|
-
# This is used when one worker leaves a shard so that another worker knows where to start again.
|
104
|
-
config :checkpointer, :validate => :string, :default => "logstash_input_dynamodb_cptr"
|
105
|
-
|
106
|
-
# Option to publish metrics to Cloudwatch using the checkpointer name.
|
107
|
-
config :publish_metrics, :validate => :boolean, :default => false
|
108
|
-
|
109
|
-
# Option to not automatically stream new data into logstash from DynamoDB streams.
|
110
|
-
config :perform_stream, :validate => :boolean, :default => true
|
111
|
-
|
112
|
-
# Number of read operations per second to perform when scanning the specified table.
|
113
|
-
config :read_ops, :validate => :number, :default => 1
|
114
|
-
|
115
|
-
# Number of threads to use when scanning the specified table
|
116
|
-
config :number_of_scan_threads, :validate => :number, :default => 1
|
117
|
-
|
118
|
-
# Number of threads to write to the logstash queue when scanning the table
|
119
|
-
config :number_of_write_threads, :validate => :number, :default => 1
|
120
|
-
|
121
|
-
# Configuation for how the logs will be transferred.
|
122
|
-
# plain is simply pass the message along without editing it.
|
123
|
-
# dynamodb will return just the data specified in the view_format in dynamodb format.
|
124
|
-
# For more information see: docs.aws.amazon.com/amazondynamodb/latest/developerguide/DataFormat.html
|
125
|
-
# json_drop_binary will return just the data specified in the view_format in JSON while not including any binary values that were present.
|
126
|
-
# json_binary_as_text will return just the data specified in the view_format in JSON while including binary values as base64-encoded text.
|
127
|
-
config :log_format, :validate => [LF_PLAIN, LF_DYNAMODB, LF_JSON_NO_BIN, LF_JSON_BIN_AS_TEXT], :default => "plain"
|
128
|
-
|
129
|
-
public
|
130
|
-
def build_credentials
|
131
|
-
if !@aws_access_key_id.to_s.empty? and !@aws_secret_access_key.to_s.empty?
|
132
|
-
@logger.info("Using static credentials: " + @aws_access_key_id + ", " + @aws_secret_access_key)
|
133
|
-
basic = AmazonCredentials::BasicAWSCredentials.new(@aws_access_key_id, @aws_secret_access_key)
|
134
|
-
return AmazonCredentials::StaticCredentialsProvider.new(basic)
|
135
|
-
else
|
136
|
-
@logger.info("Using default provider chain")
|
137
|
-
return AmazonCredentials::DefaultAWSCredentialsProviderChain.new()
|
138
|
-
end # if neither aws access keys
|
139
|
-
end # def build_credentials
|
140
|
-
|
141
|
-
public
|
142
|
-
def register
|
143
|
-
LogStash::Logger.setup_log4j(@logger)
|
144
|
-
|
145
|
-
@host = Socket.gethostname
|
146
|
-
@logger.info("Tablename: " + @table_name)
|
147
|
-
@queue = SizedQueue.new(20)
|
148
|
-
@credentials = build_credentials()
|
149
|
-
@logger.info("Checkpointer: " + @checkpointer)
|
150
|
-
|
151
|
-
if @perform_scan and @view_type == VT_OLD_IMAGE
|
152
|
-
raise(LogStash::ConfigurationError, "Cannot perform scan with view type: " + @view_type + " configuration")
|
153
|
-
end
|
154
|
-
if @view_type == VT_ALL_IMAGES and !(@log_format == LF_PLAIN)
|
155
|
-
raise(LogStash::ConfigurationError, "Cannot show view_type: " + @view_type + ", with log_format: " + @log_format)
|
156
|
-
end
|
157
|
-
|
158
|
-
#Create DynamoDB Client
|
159
|
-
@client_configuration = AmazonDynamoDB::ClientConfiguration.new()
|
160
|
-
@client_configuration.setUserAgent(@client_configuration.getUserAgent() + USER_AGENT)
|
161
|
-
@dynamodb_client = AmazonDynamoDB::AmazonDynamoDBClient.new(@credentials, @client_configuration)
|
162
|
-
|
163
|
-
@logger.info(@dynamodb_client.to_s)
|
164
|
-
|
165
|
-
@dynamodb_client.setEndpoint(@endpoint)
|
166
|
-
@logger.info("DynamoDB endpoint: " + @endpoint)
|
167
|
-
|
168
|
-
@key_schema = Array.new
|
169
|
-
@table_description = @dynamodb_client.describeTable(@table_name).getTable()
|
170
|
-
key_iterator = @table_description.getKeySchema().iterator()
|
171
|
-
while(key_iterator.hasNext())
|
172
|
-
@key_schema.push(key_iterator.next().getAttributeName().to_s)
|
173
|
-
end
|
174
|
-
region = RegionUtils.getRegionByEndpoint(@endpoint)
|
175
|
-
|
176
|
-
@parser ||= Logstash::Inputs::DynamoDB::DynamoDBLogParser.new(@view_type, @log_format, @key_schema, region)
|
177
|
-
|
178
|
-
if @perform_stream
|
179
|
-
setup_stream
|
180
|
-
end # unless @perform_stream
|
181
|
-
end # def register
|
182
|
-
|
183
|
-
public
|
184
|
-
def run(logstash_queue)
|
185
|
-
begin
|
186
|
-
run_with_catch(logstash_queue)
|
187
|
-
rescue LogStash::ShutdownSignal
|
188
|
-
exit_threads
|
189
|
-
until @queue.empty?
|
190
|
-
@logger.info("Flushing rest of events in logstash queue")
|
191
|
-
event = @queue.pop()
|
192
|
-
queue_event(@parser.parse_stream(event), logstash_queue, @host)
|
193
|
-
end # until !@queue.empty?
|
194
|
-
end # begin
|
195
|
-
end # def run(logstash_queue)
|
196
|
-
|
197
|
-
# Starts KCL app in a background thread
|
198
|
-
# Starts parallel scan if need be in a background thread
|
199
|
-
private
|
200
|
-
def run_with_catch(logstash_queue)
|
201
|
-
if @perform_scan
|
202
|
-
scan(logstash_queue)
|
203
|
-
end # if @perform_scan
|
204
|
-
|
205
|
-
# Once scan is finished, start kcl thread to read from streams
|
206
|
-
if @perform_stream
|
207
|
-
stream(logstash_queue)
|
208
|
-
end # unless @perform_stream
|
209
|
-
end # def run
|
210
|
-
|
211
|
-
private
|
212
|
-
def setup_stream
|
213
|
-
worker_id = SecureRandom.uuid()
|
214
|
-
@logger.info("WorkerId: " + worker_id)
|
215
|
-
|
216
|
-
dynamodb_streams_client = AmazonDynamoDB::AmazonDynamoDBStreamsClient.new(@credentials, @client_configuration)
|
217
|
-
adapter = Java::ComAmazonawsServicesDynamodbv2Streamsadapter::AmazonDynamoDBStreamsAdapterClient.new(@credentials)
|
218
|
-
if !@streams_endpoint.nil?
|
219
|
-
adapter.setEndpoint(@streams_endpoint)
|
220
|
-
dynamodb_streams_client.setEndpoint(@streams_endpoint)
|
221
|
-
@logger.info("DynamoDB Streams endpoint: " + @streams_endpoint)
|
222
|
-
else
|
223
|
-
raise(LogStash::ConfigurationError, "Cannot stream without a configured streams endpoint")
|
224
|
-
end # if not @streams_endpoint.to_s.empty?
|
225
|
-
|
226
|
-
stream_arn = nil
|
227
|
-
begin
|
228
|
-
stream_arn = @table_description.getLatestStreamArn()
|
229
|
-
stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \
|
230
|
-
.withStreamArn(stream_arn)).getStreamDescription()
|
231
|
-
|
232
|
-
stream_status = stream_description.getStreamStatus()
|
233
|
-
|
234
|
-
stream_view_type = stream_description.getStreamViewType().to_s.downcase
|
235
|
-
unless (stream_view_type == @view_type or @view_type == VT_KEYS_ONLY or stream_view_type == VT_ALL_IMAGES)
|
236
|
-
raise(LogStash::ConfigurationError, "Cannot stream " + @view_type + " when stream is setup for " + stream_view_type)
|
237
|
-
end
|
238
|
-
|
239
|
-
while stream_status == "ENABLING"
|
240
|
-
if(stream_status == "ENABLING")
|
241
|
-
@logger.info("Sleeping until stream is enabled")
|
242
|
-
sleep(1)
|
243
|
-
end # if stream_status == "ENABLING"
|
244
|
-
stream_description = dynamodb_streams_client.describeStream(AmazonDynamoDB::DescribeStreamRequest.new() \
|
245
|
-
.withStreamArn(stream_arn)).getStreamDescription()
|
246
|
-
stream_status = stream_description.getStreamStatus()
|
247
|
-
end # while not active
|
248
|
-
|
249
|
-
if !(stream_status == "ENABLED")
|
250
|
-
raise(LogStash::PluginLoadingError, "No streams are enabled")
|
251
|
-
end # if not active
|
252
|
-
@logger.info("Stream Id: " + stream_arn)
|
253
|
-
rescue AmazonDynamoDB::ResourceNotFoundException => rnfe
|
254
|
-
raise(LogStash::PluginLoadingError, rnfe.message)
|
255
|
-
rescue AmazonClientException => ace
|
256
|
-
raise(LogStash::ConfigurationError, "AWS credentials invalid or not found in the provider chain\n" + ace.message)
|
257
|
-
end # begin
|
258
|
-
|
259
|
-
kcl_config = KCL::KinesisClientLibConfiguration.new(@checkpointer, stream_arn, @credentials, worker_id) \
|
260
|
-
.withInitialPositionInStream(KCL::InitialPositionInStream::TRIM_HORIZON)
|
261
|
-
cloudwatch_client = nil
|
262
|
-
if @publish_metrics
|
263
|
-
cloudwatch_client = CloudWatch::AmazonCloudWatchClient.new(@credentials)
|
264
|
-
else
|
265
|
-
kclMetricsLogger = LogManager.getLogger("com.amazonaws.services.kinesis.metrics")
|
266
|
-
kclMetricsLogger.setAdditivity(false)
|
267
|
-
kclMetricsLogger.setLevel(Level::OFF)
|
268
|
-
end # if @publish_metrics
|
269
|
-
@worker = KCL::Worker.new(Logstash::Inputs::DynamoDB::LogStashRecordProcessorFactory.new(@queue), kcl_config, adapter, @dynamodb_client, cloudwatch_client)
|
270
|
-
end # def setup_stream
|
271
|
-
|
272
|
-
private
|
273
|
-
def scan(logstash_queue)
|
274
|
-
@logger.info("Starting scan...")
|
275
|
-
@logstash_writer = DynamoDBBootstrap::BlockingQueueConsumer.new(@number_of_write_threads)
|
276
|
-
|
277
|
-
@connector = DynamoDBBootstrap::DynamoDBBootstrapWorker.new(@dynamodb_client, @read_ops, @table_name, @number_of_scan_threads)
|
278
|
-
start_table_copy_thread
|
279
|
-
|
280
|
-
scan_queue = @logstash_writer.getQueue()
|
281
|
-
while true
|
282
|
-
event = scan_queue.take()
|
283
|
-
if event.getEntry().nil? and event.getSize() == -1
|
284
|
-
break
|
285
|
-
end # if event.isEmpty()
|
286
|
-
queue_event(@parser.parse_scan(event.getEntry(), event.getSize()), logstash_queue, @host)
|
287
|
-
end # while true
|
288
|
-
end
|
289
|
-
|
290
|
-
private
|
291
|
-
def stream(logstash_queue)
|
292
|
-
@logger.info("Starting stream...")
|
293
|
-
start_kcl_thread
|
294
|
-
|
295
|
-
while true
|
296
|
-
event = @queue.pop()
|
297
|
-
queue_event(@parser.parse_stream(event), logstash_queue, @host)
|
298
|
-
end # while true
|
299
|
-
end
|
300
|
-
|
301
|
-
private
|
302
|
-
def exit_threads
|
303
|
-
unless @dynamodb_scan_thread.nil?
|
304
|
-
@dynamodb_scan_thread.exit
|
305
|
-
end # unless @dynamodb_scan_thread.nil?
|
306
|
-
|
307
|
-
unless @kcl_thread.nil?
|
308
|
-
@kcl_thread.exit
|
309
|
-
end # unless @kcl_thread.nil?
|
310
|
-
end # def exit_threads
|
311
|
-
|
312
|
-
public
|
313
|
-
def queue_event(event, logstash_queue, event_host)
|
314
|
-
logstash_event = LogStash::Event.new("message" => event, "host" => event_host)
|
315
|
-
decorate(logstash_event)
|
316
|
-
logstash_queue << logstash_event
|
317
|
-
end # def queue_event
|
318
|
-
|
319
|
-
private
|
320
|
-
def start_table_copy_thread
|
321
|
-
@dynamodb_scan_thread = Thread.new(@connector, @logstash_writer) {
|
322
|
-
begin
|
323
|
-
@connector.pipe(@logstash_writer)
|
324
|
-
rescue Exception => e
|
325
|
-
abort("Scanning the table caused an error.\n" + e.message)
|
326
|
-
end # begin
|
327
|
-
}
|
328
|
-
end # def start_table_copy_thread()
|
329
|
-
|
330
|
-
private
|
331
|
-
def start_kcl_thread
|
332
|
-
@kcl_thread = Thread.new(@worker) {
|
333
|
-
begin
|
334
|
-
@worker.run()
|
335
|
-
rescue Exception => e
|
336
|
-
abort("KCL worker encountered an error.\n" + e.message)
|
337
|
-
end # begin
|
338
|
-
}
|
339
|
-
end # def start_kcl_thread
|
340
|
-
|
341
|
-
end # class Logstash::Inputs::DynamoDB
|