logstash-integration-jdbc 5.0.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +8 -0
- data/CONTRIBUTORS +22 -0
- data/Gemfile +11 -0
- data/LICENSE +13 -0
- data/NOTICE.TXT +5 -0
- data/README.md +105 -0
- data/docs/filter-jdbc_static.asciidoc +606 -0
- data/docs/filter-jdbc_streaming.asciidoc +317 -0
- data/docs/index.asciidoc +32 -0
- data/docs/input-jdbc.asciidoc +573 -0
- data/lib/logstash/filters/jdbc/basic_database.rb +125 -0
- data/lib/logstash/filters/jdbc/column.rb +39 -0
- data/lib/logstash/filters/jdbc/db_object.rb +101 -0
- data/lib/logstash/filters/jdbc/loader.rb +119 -0
- data/lib/logstash/filters/jdbc/loader_schedule.rb +64 -0
- data/lib/logstash/filters/jdbc/lookup.rb +253 -0
- data/lib/logstash/filters/jdbc/lookup_processor.rb +100 -0
- data/lib/logstash/filters/jdbc/lookup_result.rb +40 -0
- data/lib/logstash/filters/jdbc/read_only_database.rb +57 -0
- data/lib/logstash/filters/jdbc/read_write_database.rb +108 -0
- data/lib/logstash/filters/jdbc/repeating_load_runner.rb +13 -0
- data/lib/logstash/filters/jdbc/single_load_runner.rb +46 -0
- data/lib/logstash/filters/jdbc/validatable.rb +46 -0
- data/lib/logstash/filters/jdbc_static.rb +240 -0
- data/lib/logstash/filters/jdbc_streaming.rb +196 -0
- data/lib/logstash/inputs/jdbc.rb +341 -0
- data/lib/logstash/inputs/tzinfo_jruby_patch.rb +57 -0
- data/lib/logstash/plugin_mixins/jdbc/checked_count_logger.rb +43 -0
- data/lib/logstash/plugin_mixins/jdbc/jdbc.rb +298 -0
- data/lib/logstash/plugin_mixins/jdbc/statement_handler.rb +129 -0
- data/lib/logstash/plugin_mixins/jdbc/value_tracking.rb +140 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/cache_payload.rb +28 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/parameter_handler.rb +64 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming/statement_handler.rb +143 -0
- data/lib/logstash/plugin_mixins/jdbc_streaming.rb +100 -0
- data/lib/logstash/plugin_mixins/statement_handler.rb +0 -0
- data/lib/logstash-integration-jdbc_jars.rb +5 -0
- data/logstash-integration-jdbc.gemspec +44 -0
- data/spec/filters/env_helper.rb +10 -0
- data/spec/filters/integration/jdbc_static_spec.rb +154 -0
- data/spec/filters/integration/jdbcstreaming_spec.rb +173 -0
- data/spec/filters/jdbc/column_spec.rb +70 -0
- data/spec/filters/jdbc/db_object_spec.rb +81 -0
- data/spec/filters/jdbc/loader_spec.rb +77 -0
- data/spec/filters/jdbc/lookup_processor_spec.rb +132 -0
- data/spec/filters/jdbc/lookup_spec.rb +253 -0
- data/spec/filters/jdbc/read_only_database_spec.rb +67 -0
- data/spec/filters/jdbc/read_write_database_spec.rb +90 -0
- data/spec/filters/jdbc/repeating_load_runner_spec.rb +24 -0
- data/spec/filters/jdbc/single_load_runner_spec.rb +16 -0
- data/spec/filters/jdbc_static_file_local_spec.rb +83 -0
- data/spec/filters/jdbc_static_spec.rb +162 -0
- data/spec/filters/jdbc_streaming_spec.rb +350 -0
- data/spec/filters/remote_server_helper.rb +24 -0
- data/spec/filters/shared_helpers.rb +34 -0
- data/spec/helpers/WHY-THIS-JAR.txt +4 -0
- data/spec/helpers/derbyrun.jar +0 -0
- data/spec/inputs/integration/integ_spec.rb +78 -0
- data/spec/inputs/jdbc_spec.rb +1431 -0
- data/vendor/jar-dependencies/org/apache/derby/derby/10.14.1.0/derby-10.14.1.0.jar +0 -0
- data/vendor/jar-dependencies/org/apache/derby/derbyclient/10.14.1.0/derbyclient-10.14.1.0.jar +0 -0
- metadata +319 -0
@@ -0,0 +1,196 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/filters/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/jdbc_streaming"
|
5
|
+
require "logstash/plugin_mixins/jdbc_streaming/cache_payload"
|
6
|
+
require "logstash/plugin_mixins/jdbc_streaming/statement_handler"
|
7
|
+
require "logstash/plugin_mixins/jdbc_streaming/parameter_handler"
|
8
|
+
require "lru_redux"
|
9
|
+
|
10
|
+
# This filter executes a SQL query and store the result set in the field
|
11
|
+
# specified as `target`.
|
12
|
+
# It will cache the results locally in an LRU cache with expiry
|
13
|
+
#
|
14
|
+
# For example you can load a row based on an id from in the event
|
15
|
+
#
|
16
|
+
# [source,ruby]
|
17
|
+
# filter {
|
18
|
+
# jdbc_streaming {
|
19
|
+
# jdbc_driver_library => "/path/to/mysql-connector-java-5.1.34-bin.jar"
|
20
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
21
|
+
# jdbc_connection_string => ""jdbc:mysql://localhost:3306/mydatabase"
|
22
|
+
# jdbc_user => "me"
|
23
|
+
# jdbc_password => "secret"
|
24
|
+
# statement => "select * from WORLD.COUNTRY WHERE Code = :code"
|
25
|
+
# parameters => { "code" => "country_code"}
|
26
|
+
# target => "country_details"
|
27
|
+
# }
|
28
|
+
# }
|
29
|
+
#
|
30
|
+
# Prepared Statement Mode example
|
31
|
+
#
|
32
|
+
# [source,ruby]
|
33
|
+
# filter {
|
34
|
+
# jdbc_streaming {
|
35
|
+
# jdbc_driver_library => "/path/to/mysql-connector-java-5.1.34-bin.jar"
|
36
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
37
|
+
# jdbc_connection_string => ""jdbc:mysql://localhost:3306/mydatabase"
|
38
|
+
# jdbc_user => "me"
|
39
|
+
# jdbc_password => "secret"
|
40
|
+
# statement => "select * from WORLD.COUNTRY WHERE Code = ?"
|
41
|
+
# use_prepared_statements => true
|
42
|
+
# prepared_statement_name => "get_country_from_code"
|
43
|
+
# prepared_statement_bind_values => ["[country_code]"]
|
44
|
+
# target => "country_details"
|
45
|
+
# }
|
46
|
+
# }
|
47
|
+
#
|
48
|
+
module LogStash module Filters class JdbcStreaming < LogStash::Filters::Base
|
49
|
+
include LogStash::PluginMixins::JdbcStreaming
|
50
|
+
|
51
|
+
config_name "jdbc_streaming"
|
52
|
+
|
53
|
+
# Statement to execute.
|
54
|
+
# To use parameters, use named parameter syntax, for example "SELECT * FROM MYTABLE WHERE ID = :id"
|
55
|
+
config :statement, :validate => :string, :required => true
|
56
|
+
|
57
|
+
# Hash of query parameter, for example `{ "id" => "id_field" }`
|
58
|
+
config :parameters, :validate => :hash, :default => {}
|
59
|
+
|
60
|
+
# Define the target field to store the extracted result(s)
|
61
|
+
# Field is overwritten if exists
|
62
|
+
config :target, :validate => :string, :required => true
|
63
|
+
|
64
|
+
# Define a default object to use when lookup fails to return a matching row.
|
65
|
+
# ensure that the key names of this object match the columns from the statement
|
66
|
+
config :default_hash, :validate => :hash, :default => {}
|
67
|
+
|
68
|
+
# Append values to the `tags` field if sql error occured
|
69
|
+
config :tag_on_failure, :validate => :array, :default => ["_jdbcstreamingfailure"]
|
70
|
+
|
71
|
+
# Append values to the `tags` field if no record was found and default values were used
|
72
|
+
config :tag_on_default_use, :validate => :array, :default => ["_jdbcstreamingdefaultsused"]
|
73
|
+
|
74
|
+
# Enable or disable caching, boolean true or false, defaults to true
|
75
|
+
config :use_cache, :validate => :boolean, :default => true
|
76
|
+
|
77
|
+
# The minimum number of seconds any entry should remain in the cache, defaults to 5 seconds
|
78
|
+
# A numeric value, you can use decimals for example `{ "cache_expiration" => 0.25 }`
|
79
|
+
# If there are transient jdbc errors the cache will store empty results for a given
|
80
|
+
# parameter set and bypass the jbdc lookup, this merges the default_hash into the event, until
|
81
|
+
# the cache entry expires, then the jdbc lookup will be tried again for the same parameters
|
82
|
+
# Conversely, while the cache contains valid results any external problem that would cause
|
83
|
+
# jdbc errors, will not be noticed for the cache_expiration period.
|
84
|
+
config :cache_expiration, :validate => :number, :default => 5.0
|
85
|
+
|
86
|
+
# The maximum number of cache entries are stored, defaults to 500 entries
|
87
|
+
# The least recently used entry will be evicted
|
88
|
+
config :cache_size, :validate => :number, :default => 500
|
89
|
+
|
90
|
+
config :use_prepared_statements, :validate => :boolean, :default => false
|
91
|
+
config :prepared_statement_name, :validate => :string, :default => ""
|
92
|
+
config :prepared_statement_bind_values, :validate => :array, :default => []
|
93
|
+
config :prepared_statement_warn_on_constant_usage, :validate => :boolean, :default => true # deprecate in a future major LS release
|
94
|
+
|
95
|
+
# Options hash to pass to Sequel
|
96
|
+
config :sequel_opts, :validate => :hash, :default => {}
|
97
|
+
|
98
|
+
attr_reader :prepared_statement_constant_warned # for test verification, remove when warning is deprecated and removed
|
99
|
+
|
100
|
+
# ----------------------------------------
|
101
|
+
public
|
102
|
+
|
103
|
+
def register
|
104
|
+
convert_config_options
|
105
|
+
if @use_prepared_statements
|
106
|
+
validation_errors = validate_prepared_statement_mode
|
107
|
+
unless validation_errors.empty?
|
108
|
+
raise(LogStash::ConfigurationError, "Prepared Statement Mode validation errors: " + validation_errors.join(", "))
|
109
|
+
end
|
110
|
+
else
|
111
|
+
# symbolise and wrap value in parameter handler
|
112
|
+
unless @parameters.values.all?{|v| v.is_a?(PluginMixins::JdbcStreaming::ParameterHandler)}
|
113
|
+
@parameters = parameters.inject({}) do |hash,(k,value)|
|
114
|
+
hash[k.to_sym] = PluginMixins::JdbcStreaming::ParameterHandler.build_parameter_handler(value)
|
115
|
+
hash
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
@statement_handler = LogStash::PluginMixins::JdbcStreaming::StatementHandler.build_statement_handler(self)
|
120
|
+
prepare_jdbc_connection
|
121
|
+
end
|
122
|
+
|
123
|
+
def filter(event)
|
124
|
+
result = @statement_handler.cache_lookup(@database, event) # should return a CachePayload instance
|
125
|
+
|
126
|
+
if result.failed?
|
127
|
+
tag_failure(event)
|
128
|
+
end
|
129
|
+
|
130
|
+
if result.empty?
|
131
|
+
tag_default(event)
|
132
|
+
process_event(event, @default_array)
|
133
|
+
else
|
134
|
+
process_event(event, result.payload)
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def close
|
139
|
+
@database.disconnect
|
140
|
+
rescue => e
|
141
|
+
logger.warn("Exception caught when attempting to close filter.", :exception => e.message, :backtrace => e.backtrace)
|
142
|
+
end
|
143
|
+
|
144
|
+
# ----------------------------------------
|
145
|
+
private
|
146
|
+
|
147
|
+
def tag_failure(event)
|
148
|
+
@tag_on_failure.each do |tag|
|
149
|
+
event.tag(tag)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def tag_default(event)
|
154
|
+
@tag_on_default_use.each do |tag|
|
155
|
+
event.tag(tag)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def process_event(event, value)
|
160
|
+
# use deep clone here so other filter function don't taint the cached payload by reference
|
161
|
+
event.set(@target, ::LogStash::Util.deep_clone(value))
|
162
|
+
filter_matched(event)
|
163
|
+
end
|
164
|
+
|
165
|
+
def convert_config_options
|
166
|
+
# create these object once they will be cloned for every filter call anyway,
|
167
|
+
# lets not create a new object for each
|
168
|
+
@default_array = [@default_hash]
|
169
|
+
end
|
170
|
+
|
171
|
+
def validate_prepared_statement_mode
|
172
|
+
@prepared_statement_constant_warned = false
|
173
|
+
error_messages = []
|
174
|
+
if @prepared_statement_name.empty?
|
175
|
+
error_messages << "must provide a name for the Prepared Statement, it must be unique for the db session"
|
176
|
+
end
|
177
|
+
if @statement.count("?") != @prepared_statement_bind_values.size
|
178
|
+
# mismatch in number of bind value elements to placeholder characters
|
179
|
+
error_messages << "there is a mismatch between the number of statement `?` placeholders and :prepared_statement_bind_values array setting elements"
|
180
|
+
end
|
181
|
+
unless @prepared_statement_bind_values.all?{|v| v.is_a?(PluginMixins::JdbcStreaming::ParameterHandler)}
|
182
|
+
@prepared_statement_bind_values = prepared_statement_bind_values.map do |value|
|
183
|
+
ParameterHandler.build_bind_value_handler(value)
|
184
|
+
end
|
185
|
+
end
|
186
|
+
if prepared_statement_warn_on_constant_usage
|
187
|
+
warnables = @prepared_statement_bind_values.select {|handler| handler.is_a?(PluginMixins::JdbcStreaming::ConstantParameter) && handler.given_value.is_a?(String)}
|
188
|
+
unless warnables.empty?
|
189
|
+
@prepared_statement_constant_warned = true
|
190
|
+
msg = "When using prepared statements, the following `prepared_statement_bind_values` will be treated as constants, if you intend them to be field references please use the square bracket field reference syntax e.g. '[field]'"
|
191
|
+
logger.warn(msg, :constants => warnables)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
error_messages
|
195
|
+
end
|
196
|
+
end end end # class LogStash::Filters::JdbcStreaming
|
@@ -0,0 +1,341 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "logstash/inputs/base"
|
3
|
+
require "logstash/namespace"
|
4
|
+
require "logstash/plugin_mixins/jdbc/jdbc"
|
5
|
+
|
6
|
+
# this require_relative returns early unless the JRuby version is between 9.2.0.0 and 9.2.8.0
|
7
|
+
require_relative "tzinfo_jruby_patch"
|
8
|
+
|
9
|
+
# This plugin was created as a way to ingest data from any database
|
10
|
+
# with a JDBC interface into Logstash. You can periodically schedule ingestion
|
11
|
+
# using a cron syntax (see `schedule` setting) or run the query one time to load
|
12
|
+
# data into Logstash. Each row in the resultset becomes a single event.
|
13
|
+
# Columns in the resultset are converted into fields in the event.
|
14
|
+
#
|
15
|
+
# ==== Drivers
|
16
|
+
#
|
17
|
+
# This plugin does not come packaged with JDBC driver libraries. The desired
|
18
|
+
# jdbc driver library must be explicitly passed in to the plugin using the
|
19
|
+
# `jdbc_driver_library` configuration option.
|
20
|
+
#
|
21
|
+
# ==== Scheduling
|
22
|
+
#
|
23
|
+
# Input from this plugin can be scheduled to run periodically according to a specific
|
24
|
+
# schedule. This scheduling syntax is powered by https://github.com/jmettraux/rufus-scheduler[rufus-scheduler].
|
25
|
+
# The syntax is cron-like with some extensions specific to Rufus (e.g. timezone support ).
|
26
|
+
#
|
27
|
+
# Examples:
|
28
|
+
#
|
29
|
+
# |==========================================================
|
30
|
+
# | `* 5 * 1-3 *` | will execute every minute of 5am every day of January through March.
|
31
|
+
# | `0 * * * *` | will execute on the 0th minute of every hour every day.
|
32
|
+
# | `0 6 * * * America/Chicago` | will execute at 6:00am (UTC/GMT -5) every day.
|
33
|
+
# |==========================================================
|
34
|
+
#
|
35
|
+
#
|
36
|
+
# Further documentation describing this syntax can be found https://github.com/jmettraux/rufus-scheduler#parsing-cronlines-and-time-strings[here].
|
37
|
+
#
|
38
|
+
# ==== State
|
39
|
+
#
|
40
|
+
# The plugin will persist the `sql_last_value` parameter in the form of a
|
41
|
+
# metadata file stored in the configured `last_run_metadata_path`. Upon query execution,
|
42
|
+
# this file will be updated with the current value of `sql_last_value`. Next time
|
43
|
+
# the pipeline starts up, this value will be updated by reading from the file. If
|
44
|
+
# `clean_run` is set to true, this value will be ignored and `sql_last_value` will be
|
45
|
+
# set to Jan 1, 1970, or 0 if `use_column_value` is true, as if no query has ever been executed.
|
46
|
+
#
|
47
|
+
# ==== Dealing With Large Result-sets
|
48
|
+
#
|
49
|
+
# Many JDBC drivers use the `fetch_size` parameter to limit how many
|
50
|
+
# results are pre-fetched at a time from the cursor into the client's cache
|
51
|
+
# before retrieving more results from the result-set. This is configured in
|
52
|
+
# this plugin using the `jdbc_fetch_size` configuration option. No fetch size
|
53
|
+
# is set by default in this plugin, so the specific driver's default size will
|
54
|
+
# be used.
|
55
|
+
#
|
56
|
+
# ==== Usage:
|
57
|
+
#
|
58
|
+
# Here is an example of setting up the plugin to fetch data from a MySQL database.
|
59
|
+
# First, we place the appropriate JDBC driver library in our current
|
60
|
+
# path (this can be placed anywhere on your filesystem). In this example, we connect to
|
61
|
+
# the 'mydb' database using the user: 'mysql' and wish to input all rows in the 'songs'
|
62
|
+
# table that match a specific artist. The following examples demonstrates a possible
|
63
|
+
# Logstash configuration for this. The `schedule` option in this example will
|
64
|
+
# instruct the plugin to execute this input statement on the minute, every minute.
|
65
|
+
#
|
66
|
+
# [source,ruby]
|
67
|
+
# ------------------------------------------------------------------------------
|
68
|
+
# input {
|
69
|
+
# jdbc {
|
70
|
+
# jdbc_driver_library => "mysql-connector-java-5.1.36-bin.jar"
|
71
|
+
# jdbc_driver_class => "com.mysql.jdbc.Driver"
|
72
|
+
# jdbc_connection_string => "jdbc:mysql://localhost:3306/mydb"
|
73
|
+
# jdbc_user => "mysql"
|
74
|
+
# parameters => { "favorite_artist" => "Beethoven" }
|
75
|
+
# schedule => "* * * * *"
|
76
|
+
# statement => "SELECT * from songs where artist = :favorite_artist"
|
77
|
+
# }
|
78
|
+
# }
|
79
|
+
# ------------------------------------------------------------------------------
|
80
|
+
#
|
81
|
+
# ==== Configuring SQL statement
|
82
|
+
#
|
83
|
+
# A sql statement is required for this input. This can be passed-in via a
|
84
|
+
# statement option in the form of a string, or read from a file (`statement_filepath`). File
|
85
|
+
# option is typically used when the SQL statement is large or cumbersome to supply in the config.
|
86
|
+
# The file option only supports one SQL statement. The plugin will only accept one of the options.
|
87
|
+
# It cannot read a statement from a file as well as from the `statement` configuration parameter.
|
88
|
+
#
|
89
|
+
# ==== Configuring multiple SQL statements
|
90
|
+
#
|
91
|
+
# Configuring multiple SQL statements is useful when there is a need to query and ingest data
|
92
|
+
# from different database tables or views. It is possible to define separate Logstash
|
93
|
+
# configuration files for each statement or to define multiple statements in a single configuration
|
94
|
+
# file. When using multiple statements in a single Logstash configuration file, each statement
|
95
|
+
# has to be defined as a separate jdbc input (including jdbc driver, connection string and other
|
96
|
+
# required parameters).
|
97
|
+
#
|
98
|
+
# Please note that if any of the statements use the `sql_last_value` parameter (e.g. for
|
99
|
+
# ingesting only data changed since last run), each input should define its own
|
100
|
+
# `last_run_metadata_path` parameter. Failure to do so will result in undesired behaviour, as
|
101
|
+
# all inputs will store their state to the same (default) metadata file, effectively
|
102
|
+
# overwriting each other's `sql_last_value`.
|
103
|
+
#
|
104
|
+
# ==== Predefined Parameters
|
105
|
+
#
|
106
|
+
# Some parameters are built-in and can be used from within your queries.
|
107
|
+
# Here is the list:
|
108
|
+
#
|
109
|
+
# |==========================================================
|
110
|
+
# |sql_last_value | The value used to calculate which rows to query. Before any query is run,
|
111
|
+
# this is set to Thursday, 1 January 1970, or 0 if `use_column_value` is true and
|
112
|
+
# `tracking_column` is set. It is updated accordingly after subsequent queries are run.
|
113
|
+
# |==========================================================
|
114
|
+
#
|
115
|
+
# Example:
|
116
|
+
# [source,ruby]
|
117
|
+
# ---------------------------------------------------------------------------------------------------
|
118
|
+
# input {
|
119
|
+
# jdbc {
|
120
|
+
# statement => "SELECT id, mycolumn1, mycolumn2 FROM my_table WHERE id > :sql_last_value"
|
121
|
+
# use_column_value => true
|
122
|
+
# tracking_column => "id"
|
123
|
+
# # ... other configuration bits
|
124
|
+
# }
|
125
|
+
# }
|
126
|
+
# ---------------------------------------------------------------------------------------------------
|
127
|
+
#
|
128
|
+
module LogStash module Inputs class Jdbc < LogStash::Inputs::Base
|
129
|
+
include LogStash::PluginMixins::Jdbc::Jdbc
|
130
|
+
config_name "jdbc"
|
131
|
+
|
132
|
+
# If undefined, Logstash will complain, even if codec is unused.
|
133
|
+
default :codec, "plain"
|
134
|
+
|
135
|
+
# Statement to execute
|
136
|
+
#
|
137
|
+
# To use parameters, use named parameter syntax.
|
138
|
+
# For example:
|
139
|
+
#
|
140
|
+
# [source, ruby]
|
141
|
+
# -----------------------------------------------
|
142
|
+
# "SELECT * FROM MYTABLE WHERE id = :target_id"
|
143
|
+
# -----------------------------------------------
|
144
|
+
#
|
145
|
+
# here, ":target_id" is a named parameter. You can configure named parameters
|
146
|
+
# with the `parameters` setting.
|
147
|
+
config :statement, :validate => :string
|
148
|
+
|
149
|
+
# Path of file containing statement to execute
|
150
|
+
config :statement_filepath, :validate => :path
|
151
|
+
|
152
|
+
# Hash of query parameter, for example `{ "target_id" => "321" }`
|
153
|
+
config :parameters, :validate => :hash, :default => {}
|
154
|
+
|
155
|
+
# Schedule of when to periodically run statement, in Cron format
|
156
|
+
# for example: "* * * * *" (execute query every minute, on the minute)
|
157
|
+
#
|
158
|
+
# There is no schedule by default. If no schedule is given, then the statement is run
|
159
|
+
# exactly once.
|
160
|
+
config :schedule, :validate => :string
|
161
|
+
|
162
|
+
# Path to file with last run time
|
163
|
+
config :last_run_metadata_path, :validate => :string, :default => "#{ENV['HOME']}/.logstash_jdbc_last_run"
|
164
|
+
|
165
|
+
# Use an incremental column value rather than a timestamp
|
166
|
+
config :use_column_value, :validate => :boolean, :default => false
|
167
|
+
|
168
|
+
# If tracking column value rather than timestamp, the column whose value is to be tracked
|
169
|
+
config :tracking_column, :validate => :string
|
170
|
+
|
171
|
+
# Type of tracking column. Currently only "numeric" and "timestamp"
|
172
|
+
config :tracking_column_type, :validate => ['numeric', 'timestamp'], :default => 'numeric'
|
173
|
+
|
174
|
+
# Whether the previous run state should be preserved
|
175
|
+
config :clean_run, :validate => :boolean, :default => false
|
176
|
+
|
177
|
+
# Whether to save state or not in last_run_metadata_path
|
178
|
+
config :record_last_run, :validate => :boolean, :default => true
|
179
|
+
|
180
|
+
# Whether to force the lowercasing of identifier fields
|
181
|
+
config :lowercase_column_names, :validate => :boolean, :default => true
|
182
|
+
|
183
|
+
# The character encoding of all columns, leave empty if the columns are already properly UTF-8
|
184
|
+
# encoded. Specific columns charsets using :columns_charset can override this setting.
|
185
|
+
config :charset, :validate => :string
|
186
|
+
|
187
|
+
# The character encoding for specific columns. This option will override the `:charset` option
|
188
|
+
# for the specified columns.
|
189
|
+
#
|
190
|
+
# Example:
|
191
|
+
# [source,ruby]
|
192
|
+
# -------------------------------------------------------
|
193
|
+
# input {
|
194
|
+
# jdbc {
|
195
|
+
# ...
|
196
|
+
# columns_charset => { "column0" => "ISO-8859-1" }
|
197
|
+
# ...
|
198
|
+
# }
|
199
|
+
# }
|
200
|
+
# -------------------------------------------------------
|
201
|
+
# this will only convert column0 that has ISO-8859-1 as an original encoding.
|
202
|
+
config :columns_charset, :validate => :hash, :default => {}
|
203
|
+
|
204
|
+
config :use_prepared_statements, :validate => :boolean, :default => false
|
205
|
+
|
206
|
+
config :prepared_statement_name, :validate => :string, :default => ""
|
207
|
+
|
208
|
+
config :prepared_statement_bind_values, :validate => :array, :default => []
|
209
|
+
|
210
|
+
attr_reader :database # for test mocking/stubbing
|
211
|
+
|
212
|
+
public
|
213
|
+
|
214
|
+
def register
|
215
|
+
@logger = self.logger
|
216
|
+
require "rufus/scheduler"
|
217
|
+
prepare_jdbc_connection
|
218
|
+
|
219
|
+
if @use_column_value
|
220
|
+
# Raise an error if @use_column_value is true, but no @tracking_column is set
|
221
|
+
if @tracking_column.nil?
|
222
|
+
raise(LogStash::ConfigurationError, "Must set :tracking_column if :use_column_value is true.")
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
unless @statement.nil? ^ @statement_filepath.nil?
|
227
|
+
raise(LogStash::ConfigurationError, "Must set either :statement or :statement_filepath. Only one may be set at a time.")
|
228
|
+
end
|
229
|
+
|
230
|
+
@statement = ::File.read(@statement_filepath) if @statement_filepath
|
231
|
+
|
232
|
+
# must validate prepared statement mode after trying to read in from @statement_filepath
|
233
|
+
if @use_prepared_statements
|
234
|
+
validation_errors = validate_prepared_statement_mode
|
235
|
+
unless validation_errors.empty?
|
236
|
+
raise(LogStash::ConfigurationError, "Prepared Statement Mode validation errors: " + validation_errors.join(", "))
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
set_value_tracker(LogStash::PluginMixins::Jdbc::ValueTracking.build_last_value_tracker(self))
|
241
|
+
set_statement_logger(LogStash::PluginMixins::Jdbc::CheckedCountLogger.new(@logger))
|
242
|
+
|
243
|
+
@enable_encoding = !@charset.nil? || !@columns_charset.empty?
|
244
|
+
|
245
|
+
if (@jdbc_password_filepath and @jdbc_password)
|
246
|
+
raise(LogStash::ConfigurationError, "Only one of :jdbc_password, :jdbc_password_filepath may be set at a time.")
|
247
|
+
end
|
248
|
+
|
249
|
+
@jdbc_password = LogStash::Util::Password.new(::File.read(@jdbc_password_filepath).strip) if @jdbc_password_filepath
|
250
|
+
|
251
|
+
if enable_encoding?
|
252
|
+
encodings = @columns_charset.values
|
253
|
+
encodings << @charset if @charset
|
254
|
+
|
255
|
+
@converters = encodings.each_with_object({}) do |encoding, converters|
|
256
|
+
converter = LogStash::Util::Charset.new(encoding)
|
257
|
+
converter.logger = self.logger
|
258
|
+
converters[encoding] = converter
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end # def register
|
262
|
+
|
263
|
+
# test injection points
|
264
|
+
def set_statement_logger(instance)
|
265
|
+
@statement_handler = LogStash::PluginMixins::Jdbc::StatementHandler.build_statement_handler(self, instance)
|
266
|
+
end
|
267
|
+
|
268
|
+
def set_value_tracker(instance)
|
269
|
+
@value_tracker = instance
|
270
|
+
end
|
271
|
+
|
272
|
+
def run(queue)
|
273
|
+
if @schedule
|
274
|
+
@scheduler = Rufus::Scheduler.new(:max_work_threads => 1)
|
275
|
+
@scheduler.cron @schedule do
|
276
|
+
execute_query(queue)
|
277
|
+
end
|
278
|
+
|
279
|
+
@scheduler.join
|
280
|
+
else
|
281
|
+
execute_query(queue)
|
282
|
+
end
|
283
|
+
end # def run
|
284
|
+
|
285
|
+
def stop
|
286
|
+
close_jdbc_connection
|
287
|
+
@scheduler.shutdown(:wait) if @scheduler
|
288
|
+
end
|
289
|
+
|
290
|
+
private
|
291
|
+
|
292
|
+
def validate_prepared_statement_mode
|
293
|
+
error_messages = []
|
294
|
+
if @prepared_statement_name.empty?
|
295
|
+
error_messages << "must provide a name for the Prepared Statement, it must be unique for the db session"
|
296
|
+
end
|
297
|
+
if @statement.count("?") != @prepared_statement_bind_values.size
|
298
|
+
# mismatch in number of bind value elements to placeholder characters
|
299
|
+
error_messages << "there is a mismatch between the number of statement `?` placeholders and :prepared_statement_bind_values array setting elements"
|
300
|
+
end
|
301
|
+
if @jdbc_paging_enabled
|
302
|
+
# Pagination is not supported when using prepared statements
|
303
|
+
error_messages << "JDBC pagination cannot be used at this time"
|
304
|
+
end
|
305
|
+
error_messages
|
306
|
+
end
|
307
|
+
|
308
|
+
def execute_query(queue)
|
309
|
+
execute_statement do |row|
|
310
|
+
if enable_encoding?
|
311
|
+
## do the necessary conversions to string elements
|
312
|
+
row = Hash[row.map { |k, v| [k.to_s, convert(k, v)] }]
|
313
|
+
end
|
314
|
+
event = LogStash::Event.new(row)
|
315
|
+
decorate(event)
|
316
|
+
queue << event
|
317
|
+
end
|
318
|
+
@value_tracker.write
|
319
|
+
end
|
320
|
+
|
321
|
+
private
|
322
|
+
|
323
|
+
def enable_encoding?
|
324
|
+
@enable_encoding
|
325
|
+
end
|
326
|
+
|
327
|
+
# make sure the encoding is uniform over fields
|
328
|
+
def convert(column_name, value)
|
329
|
+
return value unless value.is_a?(String)
|
330
|
+
column_charset = @columns_charset[column_name]
|
331
|
+
if column_charset
|
332
|
+
converter = @converters[column_charset]
|
333
|
+
converter.convert(value)
|
334
|
+
elsif @charset
|
335
|
+
converter = @converters[@charset]
|
336
|
+
converter.convert(value)
|
337
|
+
else
|
338
|
+
value
|
339
|
+
end
|
340
|
+
end
|
341
|
+
end end end # class LogStash::Inputs::Jdbc
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# There is a bug in JRuby versions between 9.2.0.0 and 9.2.8.0
|
5
|
+
# the TZinfo::Timestamp `new_datetime` can build Rational numbers having
|
6
|
+
# numerators and denominators that are too large for Java longs.
|
7
|
+
#
|
8
|
+
# This patch reopens the TZinfo::Timestamp class and redefines
|
9
|
+
# the `new_datetime method.
|
10
|
+
# It scales down the numerator and denominator if they are larger than
|
11
|
+
# Java Long. There is no appreciable precision loss at the microsecond level
|
12
|
+
|
13
|
+
tzinfo_jruby_bugfixed_version = "9.2.8.0"
|
14
|
+
tzinfo_jruby_bugadded_version = "9.2.0.0"
|
15
|
+
|
16
|
+
current_jruby_version = Gem::Version.new(JRUBY_VERSION)
|
17
|
+
broken_jruby_version = Gem::Version.new(tzinfo_jruby_bugadded_version)
|
18
|
+
patched_jruby_version = Gem::Version.new(tzinfo_jruby_bugfixed_version)
|
19
|
+
|
20
|
+
return unless current_jruby_version >= broken_jruby_version && current_jruby_version < patched_jruby_version
|
21
|
+
|
22
|
+
require 'tzinfo'
|
23
|
+
|
24
|
+
if defined?(TZInfo::VERSION) && TZInfo::VERSION > '2'
|
25
|
+
module TZInfo
|
26
|
+
# A time represented as an `Integer` number of seconds since 1970-01-01
|
27
|
+
# 00:00:00 UTC (ignoring leap seconds), the fraction through the second
|
28
|
+
# (sub_second as a `Rational`) and an optional UTC offset. Like Ruby's `Time`
|
29
|
+
# class, {Timestamp} can distinguish between a local time with a zero offset
|
30
|
+
# and a time specified explicitly as UTC.
|
31
|
+
class Timestamp
|
32
|
+
|
33
|
+
protected
|
34
|
+
|
35
|
+
def new_datetime(klass = DateTime)
|
36
|
+
val = JD_EPOCH + ((@value.to_r + @sub_second) / 86400)
|
37
|
+
datetime = klass.jd(jruby_scale_down_rational(val))
|
38
|
+
@utc_offset && @utc_offset != 0 ? datetime.new_offset(Rational(@utc_offset, 86400)) : datetime
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
# while this JRuby bug exists in 9.2.X.X https://github.com/jruby/jruby/issues/5791
|
44
|
+
# we must scale down the numerator and denominator to fit Java Long values.
|
45
|
+
|
46
|
+
def jruby_scale_down_rational(rat)
|
47
|
+
return rat if rat.numerator <= java.lang.Long::MAX_VALUE
|
48
|
+
[10, 100, 1000].each do |scale_by|
|
49
|
+
new_numerator = rat.numerator / scale_by
|
50
|
+
if new_numerator <= java.lang.Long::MAX_VALUE
|
51
|
+
return Rational(new_numerator, rat.denominator / scale_by)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module LogStash module PluginMixins module Jdbc
|
4
|
+
class CheckedCountLogger
|
5
|
+
def initialize(logger)
|
6
|
+
@logger = logger
|
7
|
+
@needs_check = true
|
8
|
+
@count_is_supported = false
|
9
|
+
@in_debug = @logger.debug?
|
10
|
+
end
|
11
|
+
|
12
|
+
def disable_count
|
13
|
+
@needs_check = false
|
14
|
+
@count_is_supported = false
|
15
|
+
end
|
16
|
+
|
17
|
+
def log_statement_parameters(statement, parameters, query)
|
18
|
+
return unless @in_debug
|
19
|
+
check_count_query(query) if @needs_check && query
|
20
|
+
if @count_is_supported
|
21
|
+
@logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters, :count => execute_count(query))
|
22
|
+
else
|
23
|
+
@logger.debug("Executing JDBC query", :statement => statement, :parameters => parameters)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def check_count_query(query)
|
28
|
+
@needs_check = false
|
29
|
+
begin
|
30
|
+
execute_count(query)
|
31
|
+
@count_is_supported = true
|
32
|
+
rescue Exception => e
|
33
|
+
@logger.warn("Attempting a count query raised an error, the generated count statement is most likely incorrect but check networking, authentication or your statement syntax", "exception" => e.message)
|
34
|
+
@logger.warn("Ongoing count statement generation is being prevented")
|
35
|
+
@count_is_supported = false
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def execute_count(query)
|
40
|
+
query.count
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end end end
|