logstash-input-multirds 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e397a301602ab70c6241f3e12f97f0b748868ca3
4
- data.tar.gz: fb55e7f3bccd472bbef7a8fc3a02b1277bf812d6
3
+ metadata.gz: 7dfd5f53cd59c0c7d8c89f79973a92bacc911b87
4
+ data.tar.gz: 184bb1f97be2b8d5896427d9ac4b5b92a585894b
5
5
  SHA512:
6
- metadata.gz: 3ae26cdbf5cbab8f1a863a7cce692351b7775b1af471360c3df2e808c85a8b9c4828deaff09d4caca909e375094f9999533819b1f9427ecf990668e50ffb5f61
7
- data.tar.gz: bc3d45b3f9fad096079233c7c766e66ff80060b1993dfb2f6f0361f11413404b083d6d6c2a9c496e95bc25debfc067671c783c2c8c02de92fcd6da614e4ea67a
6
+ metadata.gz: ef525b86cb6430c1e35776a1101c22d7f3355d36c95ca79f7dc3a839fd59223c9c025c76411bcfa591ca54a79dd19e18ece8107948de4099b10ad760273dedef
7
+ data.tar.gz: 6ef768857f2d767e864f42d8afadc513fb15d722dd565e891e9c9927893bcbca2c26213d5017be1b3ea79f3e696d7d8d8c43949c127fb560fedb56b7c734f1a9
data/README.md CHANGED
@@ -1,11 +1,34 @@
1
1
  # Logstash Input Multi-RDS
2
2
 
3
- Forked from discourse/logstash-input-rds I needed competing consumer and multi-db support
3
+ Inputs RDS logs because Postgres doesn't support cloudwatch. Forked from discourse/logstash-input-rds I needed competing consumer and multi-db support. Uses DynamoDB for distributed locking and marker tracking. The plugin will create the table automatically if you give it permission, otherwise you can create it yourself where the table name is `group_name` and the primary key is a string called `id`.
4
+ ```
4
5
  input {
5
- rds {
6
- region => "us-west-2"
6
+ multirds {
7
+ region => "us-east-1"
7
8
  instance_name_pattern => ".*"
8
9
  log_file_name_pattern => ".*"
9
10
  group_name => "rds"
11
+
10
12
  }
11
13
  }
14
+ ```
15
+
16
+ ## Configuration
17
+
18
+ * `region`: The AWS region for RDS. The AWS SDK reads this info from the usual places, so it's not required, but if you don't set it somewhere the plugin won't run
19
+ * **required**: false
20
+
21
+ * `instance_name_pattern`: A regex pattern of RDS instances from which logs will be consumed
22
+ * **required**: false
23
+ * **default value**: `.*`
24
+
25
+ * `log_file_name_pattern`: A regex pattern of RDS log files to consume
26
+ * **required**: false
27
+ * **default value**: `.*`
28
+
29
+ * `group_name`: A unique identifier for all the instances of logstash which will be consuming this instance and log file pattern. Used for the lock table name.
30
+ * **required**: true
31
+
32
+ * `client_id`: A unique identifier for a particular instance of logstash in the cluster
33
+ * **required**: false
34
+ * **default value**: `<hostname>:<uuid>`
@@ -3,10 +3,10 @@ require "logstash/inputs/base"
3
3
  require "logstash/namespace"
4
4
  require "stud/interval"
5
5
  require "aws-sdk"
6
- require "logstash/inputs/rds/patch"
6
+ require "logstash/inputs/multirds/patch"
7
7
  require "logstash/plugin_mixins/aws_config"
8
8
  require "time"
9
-
9
+ require "socket"
10
10
  Aws.eager_autoload!
11
11
 
12
12
  class LogStash::Inputs::Multirds < LogStash::Inputs::Base
@@ -16,55 +16,170 @@ class LogStash::Inputs::Multirds < LogStash::Inputs::Base
16
16
  milestone 1
17
17
  default :codec, "plain"
18
18
 
19
- config :instance_name_pattern, :validate => :string, :required => true
20
- config :log_file_name_pattern, :validate => :string, :required => true
19
+ config :instance_name_pattern, :validate => :string, :default => '.*'
20
+ config :log_file_name_pattern, :validate => :string, :default => '.*'
21
21
  config :polling_frequency, :validate => :number, :default => 600
22
22
  config :group_name, :validate => :string, :required => true
23
+ config :client_id, :validate => :string
24
+
25
+ def ensure_lock_table(db, table)
26
+ begin
27
+ tables = db.list_tables({
28
+
29
+ })
30
+ return true if tables.to_h[:table_names].to_a.include?(table)
31
+ # TODO: there is a potential race condition here where a table could come back in list_tables but not be in ACTIVE state we should check this better
32
+ result = db.create_table({
33
+ table_name: table,
34
+ key_schema: [
35
+ {
36
+ attribute_name: 'id',
37
+ key_type: 'HASH'
38
+ }
39
+ ],
40
+ attribute_definitions: [
41
+ {
42
+ attribute_name: 'id',
43
+ attribute_type: 'S'
44
+ }
45
+ ],
46
+ provisioned_throughput: {
47
+ read_capacity_units: 10,
48
+ write_capacity_units: 10
49
+ }
50
+ })
51
+
52
+ # wait here for the table to be ready
53
+ (1..10).each do |i|
54
+ sleep i
55
+ rsp = db.describe_table({
56
+ table_name: table
57
+ })
58
+ return true if rsp.to_h[:table][:table_status] == 'ACTIVE'
59
+ end
60
+ rescue => e
61
+ @logger.error "logstash-input-multirds ensure_lock_table exception\n #{e}"
62
+ return false
63
+ end
64
+ return false
65
+ end
66
+ def acquire_lock(db, table, id, lock_owner, expire_time: 10)
67
+ begin
68
+ db.update_item({
69
+ key: {
70
+ id: id
71
+ },
72
+ table_name: table,
73
+ update_expression: "SET lock_owner = :lock_owner, expires = :expires",
74
+ expression_attribute_values: {
75
+ ':lock_owner' => lock_owner,
76
+ ':expires' => Time.now.utc.to_i + expire_time
77
+ },
78
+ return_values: "UPDATED_NEW",
79
+ condition_expression: "attribute_not_exists(lock_owner) OR lock_owner = :lock_owner OR expires < :expires"
80
+ })
81
+ rescue => e
82
+ @logger.error "logstash-input-multirds acquire_lock exception\n #{e}"
83
+ end
84
+ end
85
+ def get_logfile_list(rds, instance_pattern, logfile_pattern)
86
+ log_files = []
87
+ begin
88
+ dbs = rds.describe_db_instances
89
+ dbs.to_h[:db_instances].each do |db|
90
+ next unless db[:db_instance_identifier] =~ /#{instance_pattern}/
91
+ logs = rds.describe_db_log_files({
92
+ db_instance_identifier: db[:db_instance_identifier]
93
+ })
94
+
95
+ logs.to_h[:describe_db_log_files].each do |log|
96
+ next unless log[:log_file_name] =~ /#{logfile_pattern}/
97
+ log[:db_instance_identifier] = db[:db_instance_identifier]
98
+ log_files.push(log)
99
+ end
100
+ end
101
+ rescue => e
102
+ @logger.error "logstash-input-multirds get_logfile_list instance_pattern: #{instance_pattern} logfile_pattern:#{logfile_pattern} exception \n#{e}"
103
+ end
104
+ log_files
105
+ end
106
+ def get_logfile_record(db, id, tablename)
107
+ res = db.get_item({
108
+ key: {
109
+ id: id
110
+ },
111
+ table_name: tablename
112
+ })
113
+ extra_fields = {'marker' => '0:0'}
114
+ extra_fields.merge(res.item)
115
+ end
116
+ def set_logfile_record(db, id, tablename, key, value)
117
+ db.update_item({
118
+ key: {
119
+ id: id
120
+ },
121
+ table_name: tablename,
122
+ update_expression: "SET #{key} = :v",
123
+ expression_attribute_values: {
124
+ ':v' => value
125
+ },
126
+ return_values: "UPDATED_NEW"
23
127
 
128
+ })
129
+ end
130
+
24
131
  def register
25
- # @logger.info "Registering multi-RDS input", :region => @region, :instance => @instance_name, :log_file => @log_file_name
26
- # @database = Aws::RDS::DBInstance.new @instance_name, aws_options_hash
27
- # path = @sincedb_path || File.join(ENV["HOME"], ".sincedb_" + Digest::MD5.hexdigest("#{@instance_name}+#{@log_file_name}"))
28
- # @sincedb = SinceDB::File.new path
29
- @logger.info "Registering multi-rds input", :instance_name_pattern => @instance_name_pattern, :log_file_name_pattern => @log_file_name_pattern, :group_name = @group_name
30
- @db = Aws::DynamoDB::Client.new
31
- @rds = Aws::RDS::Client.new
32
- # TODO: Auto-create dynamodb table here -- should that be a param?
132
+ @client_id = "#{Socket.gethostname}:#{java.util::UUID.randomUUID.to_s}" unless @client_id
133
+ @logger.info "Registering multi-rds input", :instance_name_pattern => @instance_name_pattern, :log_file_name_pattern => @log_file_name_pattern, :group_name => @group_name, :region => @region, :client_id => @client_id
134
+
135
+ @db = Aws::DynamoDB::Client.new aws_options_hash
136
+ @rds = Aws::RDS::Client.new aws_options_hash
137
+
138
+ @ready = ensure_lock_table @db, @group_name
33
139
  end
34
140
 
35
141
  def run(queue)
142
+ if !@ready
143
+ @logger.warn "multi-rds dynamodb lock table not ready, unable to proceed"
144
+ return false
145
+ end
36
146
  @thread = Thread.current
37
147
  Stud.interval(@polling_frequency) do
38
- @logger.debug "finding #{@log_file_name} for #{@instance_name} starting #{@sincedb.read} (#{@sincedb.read.to_i * 1000})"
39
- begin
40
- logfiles = @database.log_files({
41
- filename_contains: @log_file_name,
42
- file_last_written: @sincedb.read.to_i * 1000,
43
- })
44
- logfiles.each do |logfile|
45
- @logger.debug "downloading #{logfile.name} for #{@instance_name}"
46
- more = true
47
- marker = "0"
48
- while more do
49
- response = logfile.download({marker: marker})
50
- response[:log_file_data].lines.each do |line|
148
+ logs = get_logfile_list @rds, @instance_name_pattern, @log_file_name_pattern
149
+
150
+ logs.each do |log|
151
+ id = "#{log[:db_instance_identifier]}:#{log[:log_file_name]}"
152
+ lock = acquire_lock @db, @group_name, id, @client_id
153
+ next unless lock # we won't do anything with the data unless we get a lock on the file
154
+
155
+ rec = get_logfile_record @db, id, @group_name
156
+ next unless rec['marker'].split(':')[1].to_i < log[:size].to_i # No new data in the log file so just continue
157
+
158
+ # start reading log data at the marker
159
+ more = true
160
+ marker = rec[:marker]
161
+ while more do
162
+ rsp = @rds.download_db_log_file_portion(
163
+ db_instance_identifier: log[:db_instance_identifier],
164
+ log_file_name: log[:log_file_name],
165
+ marker: rec[:marker],
166
+ )
167
+ rsp[:log_file_data].lines.each do |line|
51
168
  @codec.decode(line) do |event|
52
169
  decorate event
53
- event.set "rds_instance", @instance_name
54
- event.set "log_file", @log_file_name
170
+ event.set "rds_instance", log[:db_instance_identifier]
171
+ event.set "log_file", log[:log_file_name]
55
172
  queue << event
56
173
  end
57
174
  end
58
- more = response[:additional_data_pending]
59
- marker = response[:marker]
60
- end
61
- @sincedb.write (filename2datetime logfile.name)
175
+ more = rsp[:additional_data_pending]
176
+ marker = rsp[:marker]
62
177
  end
63
- rescue Aws::RDS::Errors::ServiceError
64
- # the next iteration will resume at the same location
65
- @logger.warn "caught AWS service error"
178
+ # set the marker back in the lock table
179
+ set_logfile_record @db, id, @group_name, 'marker', marker
66
180
  end
67
181
  end
182
+
68
183
  end
69
184
 
70
185
  def stop
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'logstash-input-multirds'
3
- s.version = '0.0.1'
3
+ s.version = '0.0.2'
4
4
  s.summary = 'Ingest RDS log files to Logstash with competing consumers and multiple databases'
5
5
 
6
6
  s.authors = ['Robert Labrie']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: logstash-input-multirds
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robert Labrie