logstash-integration-jdbc 5.0.0.alpha1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +8 -0
- data/CONTRIBUTORS +22 -0
- data/Gemfile +11 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +105 -0
- data/docs/filter-jdbc_static.asciidoc +606 -0
- data/docs/filter-jdbc_streaming.asciidoc +317 -0
- data/docs/index.asciidoc +32 -0
- data/docs/input-jdbc.asciidoc +573 -0
- data/lib/logstash/filters/jdbc/basic_database.rb +125 -0
- data/lib/logstash/filters/jdbc/column.rb +39 -0
- data/lib/logstash/filters/jdbc/db_object.rb +101 -0
- data/lib/logstash/filters/jdbc/loader.rb +119 -0
- data/lib/logstash/filters/jdbc/loader_schedule.rb +64 -0
- data/lib/logstash/filters/jdbc/lookup.rb +253 -0
- data/lib/logstash/filters/jdbc/lookup_processor.rb +100 -0
- data/lib/logstash/filters/jdbc/lookup_result.rb +40 -0
- data/lib/logstash/filters/jdbc/read_only_database.rb +57 -0
- data/lib/logstash/filters/jdbc/read_write_database.rb +108 -0
- data/lib/logstash/filters/jdbc/repeating_load_runner.rb +13 -0
- data/lib/logstash/filters/jdbc/single_load_runner.rb +46 -0
- data/lib/logstash/filters/jdbc/validatable.rb +46 -0
- data/lib/logstash/filters/jdbc_static.rb +240 -0
- data/lib/logstash/filters/jdbc_streaming.rb +196 -0
- data/lib/logstash/inputs/jdbc.rb +341 -0
- data/lib/logstash/inputs/tzinfo_jruby_patch.rb +57 -0
- data/lib/logstash/plugin_mixins/jdbc/checked_count_logger.rb +43 -0
- data/lib/logstash/plugin_mixins/jdbc/jdbc.rb +298 -0
- data/lib/logstash/plugin_mixins/jdbc/statement_handler.rb +129 -0
- data/lib/logstash/plugin_mixins/jdbc/value_tracking.rb +140 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/cache_payload.rb +28 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/parameter_handler.rb +64 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/statement_handler.rb +143 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming.rb +100 -0
- data/lib/logstash/plugin_mixins/statement_handler.rb +0 -0
- data/lib/logstash-integration-jdbc_jars.rb +5 -0
- data/logstash-integration-jdbc.gemspec +44 -0
- data/spec/filters/env_helper.rb +10 -0
- data/spec/filters/integration/jdbc_static_spec.rb +154 -0
- data/spec/filters/integration/jdbcstreaming_spec.rb +173 -0
- data/spec/filters/jdbc/column_spec.rb +70 -0
- data/spec/filters/jdbc/db_object_spec.rb +81 -0
- data/spec/filters/jdbc/loader_spec.rb +77 -0
- data/spec/filters/jdbc/lookup_processor_spec.rb +132 -0
- data/spec/filters/jdbc/lookup_spec.rb +253 -0
- data/spec/filters/jdbc/read_only_database_spec.rb +67 -0
- data/spec/filters/jdbc/read_write_database_spec.rb +90 -0
- data/spec/filters/jdbc/repeating_load_runner_spec.rb +24 -0
- data/spec/filters/jdbc/single_load_runner_spec.rb +16 -0
- data/spec/filters/jdbc_static_file_local_spec.rb +83 -0
- data/spec/filters/jdbc_static_spec.rb +162 -0
- data/spec/filters/jdbc_streaming_spec.rb +350 -0
- data/spec/filters/remote_server_helper.rb +24 -0
- data/spec/filters/shared_helpers.rb +34 -0
- data/spec/helpers/WHY-THIS-JAR.txt +4 -0
- data/spec/helpers/derbyrun.jar +0 -0
- data/spec/inputs/integration/integ_spec.rb +78 -0
- data/spec/inputs/jdbc_spec.rb +1431 -0
- data/vendor/jar-dependencies/org/apache/derby/derby/10.14.1.0/derby-10.14.1.0.jar +0 -0
- data/vendor/jar-dependencies/org/apache/derby/derbyclient/10.14.1.0/derbyclient-10.14.1.0.jar +0 -0
- metadata +319 -0
@@ -0,0 +1,196 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/jdbc_streaming"
|
5
|
+
require "logstash/plugin_mixins/jdbc_streaming/cache_payload"
|
6
|
+
require "logstash/plugin_mixins/jdbc_streaming/statement_handler"
|
7
|
+
require "logstash/plugin_mixins/jdbc_streaming/parameter_handler"
|
8
|
+
require "lru_redux"
|
9
|
+
|
10
|
+
# This filter executes a SQL query and store the result set in the field
|
11
|
+
# specified as `target`.
|
12
|
+
# It will cache the results locally in an LRU cache with expiry
|
13
|
+
#
|
14
|
+
# For example you can load a row based on an id from in the event
|
15
|
+
#
|
16
|
+
# [source,ruby]
|
17
|
+
# filter {
|
18
|
+
# jdbc_streaming {
|
19
|
+
# jdbc_driver_library => "/path/to/mysql-connector-java-5.1.34-bin.jar"
|
20
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
21
|
+
# jdbc_connection_string => ""jdbc:mysql://localhost:3306/mydatabase"
|
22
|
+
# jdbc_user => "me"
|
23
|
+
# jdbc_password => "secret"
|
24
|
+
# statement => "select * from WORLD.COUNTRY WHERE Code = :code"
|
25
|
+
# parameters => { "code" => "country_code"}
|
26
|
+
# target => "country_details"
|
27
|
+
# }
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# Prepared Statement Mode example
|
31
|
+
#
|
32
|
+
# [source,ruby]
|
33
|
+
# filter {
|
34
|
+
# jdbc_streaming {
|
35
|
+
# jdbc_driver_library => "/path/to/mysql-connector-java-5.1.34-bin.jar"
|
36
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
37
|
+
# jdbc_connection_string => ""jdbc:mysql://localhost:3306/mydatabase"
|
38
|
+
# jdbc_user => "me"
|
39
|
+
# jdbc_password => "secret"
|
40
|
+
# statement => "select * from WORLD.COUNTRY WHERE Code = ?"
|
41
|
+
# use_prepared_statements => true
|
42
|
+
# prepared_statement_name => "get_country_from_code"
|
43
|
+
# prepared_statement_bind_values => ["[country_code]"]
|
44
|
+
# target => "country_details"
|
45
|
+
# }
|
46
|
+
# }
|
47
|
+
#
|
48
|
+
module LogStash module Filters class JdbcStreaming < LogStash::Filters::Base
|
49
|
+
include LogStash::PluginMixins::JdbcStreaming
|
50
|
+
|
51
|
+
config_name "jdbc_streaming"
|
52
|
+
|
53
|
+
# Statement to execute.
|
54
|
+
# To use parameters, use named parameter syntax, for example "SELECT * FROM MYTABLE WHERE ID = :id"
|
55
|
+
config :statement, :validate => :string, :required => true
|
56
|
+
|
57
|
+
# Hash of query parameter, for example `{ "id" => "id_field" }`
|
58
|
+
config :parameters, :validate => :hash, :default => {}
|
59
|
+
|
60
|
+
# Define the target field to store the extracted result(s)
|
61
|
+
# Field is overwritten if exists
|
62
|
+
config :target, :validate => :string, :required => true
|
63
|
+
|
64
|
+
# Define a default object to use when lookup fails to return a matching row.
|
65
|
+
# ensure that the key names of this object match the columns from the statement
|
66
|
+
config :default_hash, :validate => :hash, :default => {}
|
67
|
+
|
68
|
+
# Append values to the `tags` field if sql error occured
|
69
|
+
config :tag_on_failure, :validate => :array, :default => ["_jdbcstreamingfailure"]
|
70
|
+
|
71
|
+
# Append values to the `tags` field if no record was found and default values were used
|
72
|
+
config :tag_on_default_use, :validate => :array, :default => ["_jdbcstreamingdefaultsused"]
|
73
|
+
|
74
|
+
# Enable or disable caching, boolean true or false, defaults to true
|
75
|
+
config :use_cache, :validate => :boolean, :default => true
|
76
|
+
|
77
|
+
# The minimum number of seconds any entry should remain in the cache, defaults to 5 seconds
|
78
|
+
# A numeric value, you can use decimals for example `{ "cache_expiration" => 0.25 }`
|
79
|
+
# If there are transient jdbc errors the cache will store empty results for a given
|
80
|
+
# parameter set and bypass the jbdc lookup, this merges the default_hash into the event, until
|
81
|
+
# the cache entry expires, then the jdbc lookup will be tried again for the same parameters
|
82
|
+
# Conversely, while the cache contains valid results any external problem that would cause
|
83
|
+
# jdbc errors, will not be noticed for the cache_expiration period.
|
84
|
+
config :cache_expiration, :validate => :number, :default => 5.0
|
85
|
+
|
86
|
+
# The maximum number of cache entries are stored, defaults to 500 entries
|
87
|
+
# The least recently used entry will be evicted
|
88
|
+
config :cache_size, :validate => :number, :default => 500
|
89
|
+
|
90
|
+
config :use_prepared_statements, :validate => :boolean, :default => false
|
91
|
+
config :prepared_statement_name, :validate => :string, :default => ""
|
92
|
+
config :prepared_statement_bind_values, :validate => :array, :default => []
|
93
|
+
config :prepared_statement_warn_on_constant_usage, :validate => :boolean, :default => true # deprecate in a future major LS release
|
94
|
+
|
95
|
+
# Options hash to pass to Sequel
|
96
|
+
config :sequel_opts, :validate => :hash, :default => {}
|
97
|
+
|
98
|
+
attr_reader :prepared_statement_constant_warned # for test verification, remove when warning is deprecated and removed
|
99
|
+
|
100
|
+
# ----------------------------------------
|
101
|
+
public
|
102
|
+
|
103
|
+
def register
|
104
|
+
convert_config_options
|
105
|
+
if @use_prepared_statements
|
106
|
+
validation_errors = validate_prepared_statement_mode
|
107
|
+
unless validation_errors.empty?
|
108
|
+
raise(LogStash::ConfigurationError, "Prepared Statement Mode validation errors: " + validation_errors.join(", "))
|
109
|
+
end
|
110
|
+
else
|
111
|
+
# symbolise and wrap value in parameter handler
|
112
|
+
unless @parameters.values.all?{|v| v.is_a?(PluginMixins::JdbcStreaming::ParameterHandler)}
|
113
|
+
@parameters = parameters.inject({}) do |hash,(k,value)|
|
114
|
+
hash[k.to_sym] = PluginMixins::JdbcStreaming::ParameterHandler.build_parameter_handler(value)
|
115
|
+
hash
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
@statement_handler = LogStash::PluginMixins::JdbcStreaming::StatementHandler.build_statement_handler(self)
|
120
|
+
prepare_jdbc_connection
|
121
|
+
end
|
122
|
+
|
123
|
+
def filter(event)
|
124
|
+
result = @statement_handler.cache_lookup(@database, event) # should return a CachePayload instance
|
125
|
+
|
126
|
+
if result.failed?
|
127
|
+
tag_failure(event)
|
128
|
+
end
|
129
|
+
|
130
|
+
if result.empty?
|
131
|
+
tag_default(event)
|
132
|
+
process_event(event, @default_array)
|
133
|
+
else
|
134
|
+
process_event(event, result.payload)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def close
|
139
|
+
@database.disconnect
|
140
|
+
rescue => e
|
141
|
+
logger.warn("Exception caught when attempting to close filter.", :exception => e.message, :backtrace => e.backtrace)
|
142
|
+
end
|
143
|
+
|
144
|
+
# ----------------------------------------
|
145
|
+
private
|
146
|
+
|
147
|
+
def tag_failure(event)
|
148
|
+
@tag_on_failure.each do |tag|
|
149
|
+
event.tag(tag)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def tag_default(event)
|
154
|
+
@tag_on_default_use.each do |tag|
|
155
|
+
event.tag(tag)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def process_event(event, value)
|
160
|
+
# use deep clone here so other filter function don't taint the cached payload by reference
|
161
|
+
event.set(@target, ::LogStash::Util.deep_clone(value))
|
162
|
+
filter_matched(event)
|
163
|
+
end
|
164
|
+
|
165
|
+
def convert_config_options
|
166
|
+
# create these object once they will be cloned for every filter call anyway,
|
167
|
+
# lets not create a new object for each
|
168
|
+
@default_array = [@default_hash]
|
169
|
+
end
|
170
|
+
|
171
|
+
def validate_prepared_statement_mode
|
172
|
+
@prepared_statement_constant_warned = false
|
173
|
+
error_messages = []
|
174
|
+
if @prepared_statement_name.empty?
|
175
|
+
error_messages << "must provide a name for the Prepared Statement, it must be unique for the db session"
|
176
|
+
end
|
177
|
+
if @statement.count("?") != @prepared_statement_bind_values.size
|
178
|
+
# mismatch in number of bind value elements to placeholder characters
|
179
|
+
error_messages << "there is a mismatch between the number of statement `?` placeholders and :prepared_statement_bind_values array setting elements"
|
180
|
+
end
|
181
|
+
unless @prepared_statement_bind_values.all?{|v| v.is_a?(PluginMixins::JdbcStreaming::ParameterHandler)}
|
182
|
+
@prepared_statement_bind_values = prepared_statement_bind_values.map do |value|
|
183
|
+
ParameterHandler.build_bind_value_handler(value)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
if prepared_statement_warn_on_constant_usage
|
187
|
+
warnables = @prepared_statement_bind_values.select {|handler| handler.is_a?(PluginMixins::JdbcStreaming::ConstantParameter) && handler.given_value.is_a?(String)}
|
188
|
+
unless warnables.empty?
|
189
|
+
@prepared_statement_constant_warned = true
|
190
|
+
msg = "When using prepared statements, the following `prepared_statement_bind_values` will be treated as constants, if you intend them to be field references please use the square bracket field reference syntax e.g. '[field]'"
|
191
|
+
logger.warn(msg, :constants => warnables)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
error_messages
|
195
|
+
end
|
196
|
+
end end end # class LogStash::Filters::JdbcStreaming
|
@@ -0,0 +1,341 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/jdbc/jdbc"
|
5
|
+
|
6
|
+
# this require_relative returns early unless the JRuby version is between 9.2.0.0 and 9.2.8.0
|
7
|
+
require_relative "tzinfo_jruby_patch"
|
8
|
+
|
9
|
+
# This plugin was created as a way to ingest data from any database
|
10
|
+
# with a JDBC interface into Logstash. You can periodically schedule ingestion
|
11
|
+
# using a cron syntax (see `schedule` setting) or run the query one time to load
|
12
|
+
# data into Logstash. Each row in the resultset becomes a single event.
|
13
|
+
# Columns in the resultset are converted into fields in the event.
|
14
|
+
#
|
15
|
+
# ==== Drivers
|
16
|
+
#
|
17
|
+
# This plugin does not come packaged with JDBC driver libraries. The desired
|
18
|
+
# jdbc driver library must be explicitly passed in to the plugin using the
|
19
|
+
# `jdbc_driver_library` configuration option.
|
20
|
+
#
|
21
|
+
# ==== Scheduling
|
22
|
+
#
|
23
|
+
# Input from this plugin can be scheduled to run periodically according to a specific
|
24
|
+
# schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler].
|
25
|
+
# The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ).
|
26
|
+
#
|
27
|
+
# Examples:
|
28
|
+
#
|
29
|
+
# |==========================================================
|
30
|
+
# | `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March.
|
31
|
+
# | `0 * * * *` | will execute on the 0th minute of every hour every day.
|
32
|
+
# | `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day.
|
33
|
+
# |==========================================================
|
34
|
+
#
|
35
|
+
#
|
36
|
+
# Further documentation describing this syntax can be found https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here].
|
37
|
+
#
|
38
|
+
# ==== State
|
39
|
+
#
|
40
|
+
# The plugin will persist the `sql_last_value` parameter in the form of a
|
41
|
+
# metadata file stored in the configured `last_run_metadata_path`. Upon query execution,
|
42
|
+
# this file will be updated with the current value of `sql_last_value`. Next time
|
43
|
+
# the pipeline starts up, this value will be updated by reading from the file. If
|
44
|
+
# `clean_run` is set to true, this value will be ignored and `sql_last_value` will be
|
45
|
+
# set to Jan 1, 1970, or 0 if `use_column_value` is true, as if no query has ever been executed.
|
46
|
+
#
|
47
|
+
# ==== Dealing With Large Result-sets
|
48
|
+
#
|
49
|
+
# Many JDBC drivers use the `fetch_size` parameter to limit how many
|
50
|
+
# results are pre-fetched at a time from the cursor into the client's cache
|
51
|
+
# before retrieving more results from the result-set. This is configured in
|
52
|
+
# this plugin using the `jdbc_fetch_size` configuration option. No fetch size
|
53
|
+
# is set by default in this plugin, so the specific driver's default size will
|
54
|
+
# be used.
|
55
|
+
#
|
56
|
+
# ==== Usage:
|
57
|
+
#
|
58
|
+
# Here is an example of setting up the plugin to fetch data from a MySQL database.
|
59
|
+
# First, we place the appropriate JDBC driver library in our current
|
60
|
+
# path (this can be placed anywhere on your filesystem). In this example, we connect to
|
61
|
+
# the 'mydb' database using the user: 'mysql' and wish to input all rows in the 'songs'
|
62
|
+
# table that match a specific artist. The following examples demonstrates a possible
|
63
|
+
# Logstash configuration for this. The `schedule` option in this example will
|
64
|
+
# instruct the plugin to execute this input statement on the minute, every minute.
|
65
|
+
#
|
66
|
+
# [source,ruby]
|
67
|
+
# ------------------------------------------------------------------------------
|
68
|
+
# input {
|
69
|
+
# jdbc {
|
70
|
+
# jdbc_driver_library => "mysql-connector-java-5.1.36-bin.jar"
|
71
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
72
|
+
# jdbc_connection_string => "jdbc:mysql://localhost:3306/mydb"
|
73
|
+
# jdbc_user => "mysql"
|
74
|
+
# parameters => { "favorite_artist" => "Beethoven" }
|
75
|
+
# schedule => "* * * * *"
|
76
|
+
# statement => "SELECT * from songs where artist = :favorite_artist"
|
77
|
+
# }
|
78
|
+
# }
|
79
|
+
# ------------------------------------------------------------------------------
|
80
|
+
#
|
81
|
+
# ==== Configuring SQL statement
|
82
|
+
#
|
83
|
+
# A sql statement is required for this input. This can be passed-in via a
|
84
|
+
# statement option in the form of a string, or read from a file (`statement_filepath`). File
|
85
|
+
# option is typically used when the SQL statement is large or cumbersome to supply in the config.
|
86
|
+
# The file option only supports one SQL statement. The plugin will only accept one of the options.
|
87
|
+
# It cannot read a statement from a file as well as from the `statement` configuration parameter.
|
88
|
+
#
|
89
|
+
# ==== Configuring multiple SQL statements
|
90
|
+
#
|
91
|
+
# Configuring multiple SQL statements is useful when there is a need to query and ingest data
|
92
|
+
# from different database tables or views. It is possible to define separate Logstash
|
93
|
+
# configuration files for each statement or to define multiple statements in a single configuration
|
94
|
+
# file. When using multiple statements in a single Logstash configuration file, each statement
|
95
|
+
# has to be defined as a separate jdbc input (including jdbc driver, connection string and other
|
96
|
+
# required parameters).
|
97
|
+
#
|
98
|
+
# Please note that if any of the statements use the `sql_last_value` parameter (e.g. for
|
99
|
+
# ingesting only data changed since last run), each input should define its own
|
100
|
+
# `last_run_metadata_path` parameter. Failure to do so will result in undesired behaviour, as
|
101
|
+
# all inputs will store their state to the same (default) metadata file, effectively
|
102
|
+
# overwriting each other's `sql_last_value`.
|
103
|
+
#
|
104
|
+
# ==== Predefined Parameters
|
105
|
+
#
|
106
|
+
# Some parameters are built-in and can be used from within your queries.
|
107
|
+
# Here is the list:
|
108
|
+
#
|
109
|
+
# |==========================================================
|
110
|
+
# |sql_last_value | The value used to calculate which rows to query. Before any query is run,
|
111
|
+
# this is set to Thursday, 1 January 1970, or 0 if `use_column_value` is true and
|
112
|
+
# `tracking_column` is set. It is updated accordingly after subsequent queries are run.
|
113
|
+
# |==========================================================
|
114
|
+
#
|
115
|
+
# Example:
|
116
|
+
# [source,ruby]
|
117
|
+
# ---------------------------------------------------------------------------------------------------
|
118
|
+
# input {
|
119
|
+
# jdbc {
|
120
|
+
# statement => "SELECT id, mycolumn1, mycolumn2 FROM my_table WHERE id > :sql_last_value"
|
121
|
+
# use_column_value => true
|
122
|
+
# tracking_column => "id"
|
123
|
+
# # ... other configuration bits
|
124
|
+
# }
|
125
|
+
# }
|
126
|
+
# ---------------------------------------------------------------------------------------------------
|
127
|
+
#
|
128
|
+
module LogStash module Inputs class Jdbc < LogStash::Inputs::Base
|
129
|
+
include LogStash::PluginMixins::Jdbc::Jdbc
|
130
|
+
config_name "jdbc"
|
131
|
+
|
132
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
133
|
+
default :codec, "plain"
|
134
|
+
|
135
|
+
# Statement to execute
|
136
|
+
#
|
137
|
+
# To use parameters, use named parameter syntax.
|
138
|
+
# For example:
|
139
|
+
#
|
140
|
+
# [source, ruby]
|
141
|
+
# -----------------------------------------------
|
142
|
+
# "SELECT * FROM MYTABLE WHERE id = :target_id"
|
143
|
+
# -----------------------------------------------
|
144
|
+
#
|
145
|
+
# here, ":target_id" is a named parameter. You can configure named parameters
|
146
|
+
# with the `parameters` setting.
|
147
|
+
config :statement, :validate => :string
|
148
|
+
|
149
|
+
# Path of file containing statement to execute
|
150
|
+
config :statement_filepath, :validate => :path
|
151
|
+
|
152
|
+
# Hash of query parameter, for example `{ "target_id" => "321" }`
|
153
|
+
config :parameters, :validate => :hash, :default => {}
|
154
|
+
|
155
|
+
# Schedule of when to periodically run statement, in Cron format
|
156
|
+
# for example: "* * * * *" (execute query every minute, on the minute)
|
157
|
+
#
|
158
|
+
# There is no schedule by default. If no schedule is given, then the statement is run
|
159
|
+
# exactly once.
|
160
|
+
config :schedule, :validate => :string
|
161
|
+
|
162
|
+
# Path to file with last run time
|
163
|
+
config :last_run_metadata_path, :validate => :string, :default => "#{ENV['HOME']}/.logstash_jdbc_last_run"
|
164
|
+
|
165
|
+
# Use an incremental column value rather than a timestamp
|
166
|
+
config :use_column_value, :validate => :boolean, :default => false
|
167
|
+
|
168
|
+
# If tracking column value rather than timestamp, the column whose value is to be tracked
|
169
|
+
config :tracking_column, :validate => :string
|
170
|
+
|
171
|
+
# Type of tracking column. Currently only "numeric" and "timestamp"
|
172
|
+
config :tracking_column_type, :validate => ['numeric', 'timestamp'], :default => 'numeric'
|
173
|
+
|
174
|
+
# Whether the previous run state should be preserved
|
175
|
+
config :clean_run, :validate => :boolean, :default => false
|
176
|
+
|
177
|
+
# Whether to save state or not in last_run_metadata_path
|
178
|
+
config :record_last_run, :validate => :boolean, :default => true
|
179
|
+
|
180
|
+
# Whether to force the lowercasing of identifier fields
|
181
|
+
config :lowercase_column_names, :validate => :boolean, :default => true
|
182
|
+
|
183
|
+
# The character encoding of all columns, leave empty if the columns are already properly UTF-8
|
184
|
+
# encoded. Specific columns charsets using :columns_charset can override this setting.
|
185
|
+
config :charset, :validate => :string
|
186
|
+
|
187
|
+
# The character encoding for specific columns. This option will override the `:charset` option
|
188
|
+
# for the specified columns.
|
189
|
+
#
|
190
|
+
# Example:
|
191
|
+
# [source,ruby]
|
192
|
+
# -------------------------------------------------------
|
193
|
+
# input {
|
194
|
+
# jdbc {
|
195
|
+
# ...
|
196
|
+
# columns_charset => { "column0" => "ISO-8859-1" }
|
197
|
+
# ...
|
198
|
+
# }
|
199
|
+
# }
|
200
|
+
# -------------------------------------------------------
|
201
|
+
# this will only convert column0 that has ISO-8859-1 as an original encoding.
|
202
|
+
config :columns_charset, :validate => :hash, :default => {}
|
203
|
+
|
204
|
+
config :use_prepared_statements, :validate => :boolean, :default => false
|
205
|
+
|
206
|
+
config :prepared_statement_name, :validate => :string, :default => ""
|
207
|
+
|
208
|
+
config :prepared_statement_bind_values, :validate => :array, :default => []
|
209
|
+
|
210
|
+
attr_reader :database # for test mocking/stubbing
|
211
|
+
|
212
|
+
public
|
213
|
+
|
214
|
+
def register
|
215
|
+
@logger = self.logger
|
216
|
+
require "rufus/scheduler"
|
217
|
+
prepare_jdbc_connection
|
218
|
+
|
219
|
+
if @use_column_value
|
220
|
+
# Raise an error if @use_column_value is true, but no @tracking_column is set
|
221
|
+
if @tracking_column.nil?
|
222
|
+
raise(LogStash::ConfigurationError, "Must set :tracking_column if :use_column_value is true.")
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
unless @statement.nil? ^ @statement_filepath.nil?
|
227
|
+
raise(LogStash::ConfigurationError, "Must set either :statement or :statement_filepath. Only one may be set at a time.")
|
228
|
+
end
|
229
|
+
|
230
|
+
@statement = ::File.read(@statement_filepath) if @statement_filepath
|
231
|
+
|
232
|
+
# must validate prepared statement mode after trying to read in from @statement_filepath
|
233
|
+
if @use_prepared_statements
|
234
|
+
validation_errors = validate_prepared_statement_mode
|
235
|
+
unless validation_errors.empty?
|
236
|
+
raise(LogStash::ConfigurationError, "Prepared Statement Mode validation errors: " + validation_errors.join(", "))
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
set_value_tracker(LogStash::PluginMixins::Jdbc::ValueTracking.build_last_value_tracker(self))
|
241
|
+
set_statement_logger(LogStash::PluginMixins::Jdbc::CheckedCountLogger.new(@logger))
|
242
|
+
|
243
|
+
@enable_encoding = !@charset.nil? || !@columns_charset.empty?
|
244
|
+
|
245
|
+
if (@jdbc_password_filepath and @jdbc_password)
|
246
|
+
raise(LogStash::ConfigurationError, "Only one of :jdbc_password, :jdbc_password_filepath may be set at a time.")
|
247
|
+
end
|
248
|
+
|
249
|
+
@jdbc_password = LogStash::Util::Password.new(::File.read(@jdbc_password_filepath).strip) if @jdbc_password_filepath
|
250
|
+
|
251
|
+
if enable_encoding?
|
252
|
+
encodings = @columns_charset.values
|
253
|
+
encodings << @charset if @charset
|
254
|
+
|
255
|
+
@converters = encodings.each_with_object({}) do |encoding, converters|
|
256
|
+
converter = LogStash::Util::Charset.new(encoding)
|
257
|
+
converter.logger = self.logger
|
258
|
+
converters[encoding] = converter
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end # def register
|
262
|
+
|
263
|
+
# test injection points
|
264
|
+
def set_statement_logger(instance)
|
265
|
+
@statement_handler = LogStash::PluginMixins::Jdbc::StatementHandler.build_statement_handler(self, instance)
|
266
|
+
end
|
267
|
+
|
268
|
+
def set_value_tracker(instance)
|
269
|
+
@value_tracker = instance
|
270
|
+
end
|
271
|
+
|
272
|
+
def run(queue)
|
273
|
+
if @schedule
|
274
|
+
@scheduler = Rufus::Scheduler.new(:max_work_threads => 1)
|
275
|
+
@scheduler.cron @schedule do
|
276
|
+
execute_query(queue)
|
277
|
+
end
|
278
|
+
|
279
|
+
@scheduler.join
|
280
|
+
else
|
281
|
+
execute_query(queue)
|
282
|
+
end
|
283
|
+
end # def run
|
284
|
+
|
285
|
+
def stop
|
286
|
+
close_jdbc_connection
|
287
|
+
@scheduler.shutdown(:wait) if @scheduler
|
288
|
+
end
|
289
|
+
|
290
|
+
private
|
291
|
+
|
292
|
+
def validate_prepared_statement_mode
|
293
|
+
error_messages = []
|
294
|
+
if @prepared_statement_name.empty?
|
295
|
+
error_messages << "must provide a name for the Prepared Statement, it must be unique for the db session"
|
296
|
+
end
|
297
|
+
if @statement.count("?") != @prepared_statement_bind_values.size
|
298
|
+
# mismatch in number of bind value elements to placeholder characters
|
299
|
+
error_messages << "there is a mismatch between the number of statement `?` placeholders and :prepared_statement_bind_values array setting elements"
|
300
|
+
end
|
301
|
+
if @jdbc_paging_enabled
|
302
|
+
# Pagination is not supported when using prepared statements
|
303
|
+
error_messages << "JDBC pagination cannot be used at this time"
|
304
|
+
end
|
305
|
+
error_messages
|
306
|
+
end
|
307
|
+
|
308
|
+
def execute_query(queue)
|
309
|
+
execute_statement do |row|
|
310
|
+
if enable_encoding?
|
311
|
+
## do the necessary conversions to string elements
|
312
|
+
row = Hash[row.map { |k, v| [k.to_s, convert(k, v)] }]
|
313
|
+
end
|
314
|
+
event = LogStash::Event.new(row)
|
315
|
+
decorate(event)
|
316
|
+
queue << event
|
317
|
+
end
|
318
|
+
@value_tracker.write
|
319
|
+
end
|
320
|
+
|
321
|
+
private
|
322
|
+
|
323
|
+
def enable_encoding?
|
324
|
+
@enable_encoding
|
325
|
+
end
|
326
|
+
|
327
|
+
# make sure the encoding is uniform over fields
|
328
|
+
def convert(column_name, value)
|
329
|
+
return value unless value.is_a?(String)
|
330
|
+
column_charset = @columns_charset[column_name]
|
331
|
+
if column_charset
|
332
|
+
converter = @converters[column_charset]
|
333
|
+
converter.convert(value)
|
334
|
+
elsif @charset
|
335
|
+
converter = @converters[@charset]
|
336
|
+
converter.convert(value)
|
337
|
+
else
|
338
|
+
value
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end end end # class LogStash::Inputs::Jdbc
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# There is a bug in JRuby versions between 9.2.0.0 and 9.2.8.0
|
5
|
+
# the TZinfo::Timestamp `new_datetime` can build Rational numbers having
|
6
|
+
# numerators and denominators that are too large for Java longs.
|
7
|
+
#
|
8
|
+
# This patch reopens the TZinfo::Timestamp class and redefines
|
9
|
+
# the `new_datetime method.
|
10
|
+
# It scales down the numerator and denominator if they are larger than
|
11
|
+
# Java Long. There is no appreciable precision loss at the microsecond level
|
12
|
+
|
13
|
+
tzinfo_jruby_bugfixed_version = "9.2.8.0"
|
14
|
+
tzinfo_jruby_bugadded_version = "9.2.0.0"
|
15
|
+
|
16
|
+
current_jruby_version = Gem::Version.new(JRUBY_VERSION)
|
17
|
+
broken_jruby_version = Gem::Version.new(tzinfo_jruby_bugadded_version)
|
18
|
+
patched_jruby_version = Gem::Version.new(tzinfo_jruby_bugfixed_version)
|
19
|
+
|
20
|
+
return unless current_jruby_version >= broken_jruby_version && current_jruby_version < patched_jruby_version
|
21
|
+
|
22
|
+
require 'tzinfo'
|
23
|
+
|
24
|
+
if defined?(TZInfo::VERSION) && TZInfo::VERSION > '2'
|
25
|
+
module TZInfo
|
26
|
+
# A time represented as an `Integer` number of seconds since 1970-01-01
|
27
|
+
# 00:00:00 UTC (ignoring leap seconds), the fraction through the second
|
28
|
+
# (sub_second as a `Rational`) and an optional UTC offset. Like Ruby's `Time`
|
29
|
+
# class, {Timestamp} can distinguish between a local time with a zero offset
|
30
|
+
# and a time specified explicitly as UTC.
|
31
|
+
class Timestamp
|
32
|
+
|
33
|
+
protected
|
34
|
+
|
35
|
+
def new_datetime(klass = DateTime)
|
36
|
+
val = JD_EPOCH + ((@value.to_r + @sub_second) / 86400)
|
37
|
+
datetime = klass.jd(jruby_scale_down_rational(val))
|
38
|
+
@utc_offset && @utc_offset != 0 ? datetime.new_offset(Rational(@utc_offset, 86400)) : datetime
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# while this JRuby bug exists in 9.2.X.X https://github.com/jruby/jruby/issues/5791
|
44
|
+
# we must scale down the numerator and denominator to fit Java Long values.
|
45
|
+
|
46
|
+
def jruby_scale_down_rational(rat)
|
47
|
+
return rat if rat.numerator <= java.lang.Long::MAX_VALUE
|
48
|
+
[10, 100, 1000].each do |scale_by|
|
49
|
+
new_numerator = rat.numerator / scale_by
|
50
|
+
if new_numerator <= java.lang.Long::MAX_VALUE
|
51
|
+
return Rational(new_numerator, rat.denominator / scale_by)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module PluginMixins module Jdbc
|
4
|
+
class CheckedCountLogger
|
5
|
+
def initialize(logger)
|
6
|
+
@logger = logger
|
7
|
+
@needs_check = true
|
8
|
+
@count_is_supported = false
|
9
|
+
@in_debug = @logger.debug?
|
10
|
+
end
|
11
|
+
|
12
|
+
def disable_count
|
13
|
+
@needs_check = false
|
14
|
+
@count_is_supported = false
|
15
|
+
end
|
16
|
+
|
17
|
+
def log_statement_parameters(statement, parameters, query)
|
18
|
+
return unless @in_debug
|
19
|
+
check_count_query(query) if @needs_check && query
|
20
|
+
if @count_is_supported
|
21
|
+
@logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters, :count => execute_count(query))
|
22
|
+
else
|
23
|
+
@logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def check_count_query(query)
|
28
|
+
@needs_check = false
|
29
|
+
begin
|
30
|
+
execute_count(query)
|
31
|
+
@count_is_supported = true
|
32
|
+
rescue Exception => e
|
33
|
+
@logger.warn("Attempting a count query raised an error, the generated count statement is most likely incorrect but check networking, authentication or your statement syntax", "exception" => e.message)
|
34
|
+
@logger.warn("Ongoing count statement generation is being prevented")
|
35
|
+
@count_is_supported = false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def execute_count(query)
|
40
|
+
query.count
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end end end
|