fluent-plugin-postgresql-csvlog 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9fd4d916e4d8cc9ebb39d62e847de1c2dca1ce0175d738ca55430a962b3283ab
4
+ data.tar.gz: b3e6baacf5bd683c39eca587c55ea0326ffe18a55d3b4f8b23ba435642641b8c
5
+ SHA512:
6
+ metadata.gz: 2e783b77515eb3ec55b4684e11d3014fdea761b057d22fb3935eafd7064d9fcb7038b8f78f7fa7e9aa238d339c8a5c3d1cac358a430c207f6ba9cd19bf8ae55a
7
+ data.tar.gz: 77dace38e2de7a851547ee25502943f0d610cceefa8ee4d1194e084301852bf3b1e8fd93e79144f6c81865d7aadfe3c3705a210f3ca4dee2029e2daa5fbdf989
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fluent-plugin-postgresql-csvlog (0.0.1)
5
+ fluentd (>= 1.0, < 2)
6
+ pg_query (~> 1.3)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ concurrent-ruby (1.1.8)
12
+ cool.io (1.7.0)
13
+ fluentd (1.12.0)
14
+ bundler
15
+ cool.io (>= 1.4.5, < 2.0.0)
16
+ http_parser.rb (>= 0.5.1, < 0.7.0)
17
+ msgpack (>= 1.3.1, < 2.0.0)
18
+ serverengine (>= 2.2.2, < 3.0.0)
19
+ sigdump (~> 0.2.2)
20
+ strptime (>= 0.2.2, < 1.0.0)
21
+ tzinfo (>= 1.0, < 3.0)
22
+ tzinfo-data (~> 1.0)
23
+ yajl-ruby (~> 1.0)
24
+ http_parser.rb (0.6.0)
25
+ msgpack (1.4.2)
26
+ pg_query (1.3.0)
27
+ power_assert (2.0.0)
28
+ rake (13.0.3)
29
+ serverengine (2.2.2)
30
+ sigdump (~> 0.2.2)
31
+ sigdump (0.2.4)
32
+ strptime (0.2.5)
33
+ test-unit (3.4.0)
34
+ power_assert
35
+ tzinfo (2.0.4)
36
+ concurrent-ruby (~> 1.0)
37
+ tzinfo-data (1.2021.1)
38
+ tzinfo (>= 1.0.0)
39
+ yajl-ruby (1.4.1)
40
+
41
+ PLATFORMS
42
+ ruby
43
+
44
+ DEPENDENCIES
45
+ fluent-plugin-postgresql-csvlog!
46
+ rake
47
+ test-unit (~> 3.2)
48
+
49
+ BUNDLED WITH
50
+ 2.1.4
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2011-present GitLab B.V.
2
+
3
+ Portions of this software are licensed as follows:
4
+
5
+ * All content residing under the "doc/" directory of this repository is licensed under "Creative Commons: CC BY-SA 4.0 license".
6
+ * All content that resides under the "ee/" directory of this repository, if that directory exists, is licensed under the license defined in "ee/LICENSE".
7
+ * All client-side JavaScript (when served directly or after being compiled, arranged, augmented, or combined), is licensed under the "MIT Expat" license.
8
+ * All third party components incorporated into the GitLab Software are licensed under the original license provided by the owner of the applicable component.
9
+ * Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,64 @@
1
+ # Introduction
2
+
3
+ This repository holds several [Fluentd](http://fluentd.org) plugins to
4
+ parse PostgreSQL CSV log files and extract slow log information:
5
+
6
+ - `MultilineCSVParser`: Parses CSV files that span multiple lines
7
+ - `PostgreSQLSlowLog`: Extracts slow log entries into `duration_s` and `statement` fields
8
+ - `PostgreSQLRedactor`: Normalizes the SQL query and redacts sensitive information
9
+
10
+ ## Installation
11
+
12
+ Use RubyGems:
13
+
14
+ fluent-gem install fluent-plugin-postgresql-csvlog --no-document
15
+
16
+ ## Configuration
17
+
18
+ ### PostgreSQL configuration
19
+
20
+ ```conf
21
+ log_destination = 'csvlog'
22
+ log_collector = on
23
+ ```
24
+
25
+ ### Fluentd conf
26
+
27
+ The configuration below shows how you might use these filters to
28
+ ingest and parse PostgreSQL CSV logs:
29
+
30
+ ```conf
31
+ ## PostgreSQL csvlog (enabled with
32
+ <source>
33
+ @type tail
34
+ tag postgres.postgres_csv
35
+ path /var/log/postgresql/*.csv
36
+ pos_file /var/log/td-agent/postgres_csv.log.pos
37
+ <parse>
38
+ @type multiline_csv
39
+ format_firstline /^\d{4}-\d{1,2}-\d{1,2} \d{2}:\d{2}:\d{2}/
40
+ keys time,user_name,database_name,process_id,connection_from,session_id,session_line_num,command_tag,session_start_time,virtual_transaction_id,transaction_id,error_severity,sql_state_code,message,detail,hint,internal_query,internal_query_pos,context,query,query_pos,location,application_name
41
+ time_key time
42
+ time_format %Y-%m-%d %H:%M:%S.%N %Z
43
+ keep_time_key true
44
+ parser_type normal
45
+ </parse>
46
+ </source>
47
+
48
+ <filter postgres.postgres_csv>
49
+ @type postgresql_slowlog
50
+ </filter>
51
+
52
+ <filter postgres.postgres_csv>
53
+ @type postgresql_redactor
54
+ </filter>
55
+
56
+ # Output resulting JSON file to a directory in /tmp
57
+ <match postgres.*>
58
+ @type file
59
+ path /tmp/fluent-test
60
+ <format>
61
+ @type json
62
+ </format>
63
+ </match>
64
+ ```
@@ -0,0 +1,22 @@
1
+ $:.push File.expand_path('lib', __dir__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'fluent-plugin-postgresql-csvlog'
5
+ s.version = '0.0.1'
6
+ s.authors = ['stanhu']
7
+ s.email = ['stanhu@gmail.com']
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
9
+ s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
+ s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
+
12
+ s.files = `git ls-files`.split("\n")
13
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
15
+ s.require_paths = ['lib']
16
+
17
+ s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
+ s.add_dependency 'pg_query', '~> 1.3'
19
+
20
+ s.add_development_dependency 'rake'
21
+ s.add_development_dependency 'test-unit', '~> 3.2'
22
+ end
@@ -0,0 +1,30 @@
1
+ require 'fluent/plugin/filter'
2
+ require 'pg_query'
3
+
4
+ module Fluent::Plugin
5
+ class PostgreSQLRedactor < Filter
6
+ Fluent::Plugin.register_filter('postgresql_redactor', self)
7
+
8
+ def configure(conf)
9
+ super
10
+ end
11
+
12
+ def filter(_tag, _time, record)
13
+ statement = record['statement']
14
+
15
+ return record unless statement
16
+
17
+ normalized = PgQuery.normalize(statement)
18
+ record.delete('statement')
19
+ record['sql'] = normalized
20
+ record.delete('message')
21
+
22
+ record
23
+ rescue PgQuery::ParseError
24
+ # pg_query currently only supports PostgresQL 10:
25
+ # https://github.com/lfittl/pg_query/issues/184
26
+ record['pg_query_error'] = true
27
+ record
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent
6
+ module Plugin
7
+ # Filters PostgreSQL slow log duration and statements from parsed record.
8
+ #
9
+ # Examples:
10
+ # duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects
11
+ # duration: 1873.345 ms execute <unnamed>: SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/
12
+ class PostgreSQLSlowLog < Filter
13
+ Fluent::Plugin.register_filter('postgresql_slowlog', self)
14
+
15
+ SLOWLOG_REGEXP = /^duration: (\d+(?:\.\d+)?) ms .*?:\s*(.*)/m.freeze
16
+
17
+ def filter(_tag, _time, record)
18
+ return record unless record.key?('message')
19
+
20
+ # rubocop:disable Style/PerlBackrefs
21
+ if record['message'] =~ SLOWLOG_REGEXP
22
+ record['duration_s'] = $1.to_f / 1000.0
23
+ record['statement'] = $2
24
+ end
25
+ # rubocop:enable Style/PerlBackrefs
26
+
27
+ record
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/parser_csv'
4
+
5
+ module Fluent
6
+ module Plugin
7
+ # This class can be used to parse CSV files that span multiple lines.
8
+ # Like the [multiline parser](https://docs.fluentd.org/parser/multiline),
9
+ # define a `format_firstline` for the `tail` input module to match against.
10
+ # Then use the [csv parser](https://docs.fluentd.org/parser/csv) parameters.
11
+ class MultilineCSVParser < CSVParser
12
+ Plugin.register_parser('multiline_csv', self)
13
+
14
+ desc 'Specify regexp pattern for start line of multiple lines'
15
+ config_param :format_firstline, :string, default: nil
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ if @format_firstline
21
+ check_format_regexp(@format_firstline, 'format_firstline')
22
+ @firstline_regex = Regexp.new(@format_firstline[1..-2])
23
+ end
24
+ end
25
+
26
+ # Used by in_tail:
27
+ # https://github.com/fluent/fluentd/blob/47be96e3a98fa247b59e479f7c62bfeff1a9ec55/lib/fluent/plugin/in_tail.rb#L523
28
+ def has_firstline?
29
+ !!@format_firstline
30
+ end
31
+
32
+ # Used by in_tail:
33
+ # https://github.com/fluent/fluentd/blob/47be96e3a98fa247b59e479f7c62bfeff1a9ec55/lib/fluent/plugin/in_tail.rb#L526
34
+ def firstline?(text)
35
+ @firstline_regex.match(text)
36
+ end
37
+
38
+ private
39
+
40
+ def check_format_regexp(format, key)
41
+ if format[0] == '/' && format[-1] == '/'
42
+ begin
43
+ Regexp.new(format[1..-2])
44
+ rescue StandardError => e
45
+ raise ConfigError, "Invalid regexp in #{key}: #{e}"
46
+ end
47
+ else
48
+ raise ConfigError, "format_firstline should be Regexp, need //: '#{format}'"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'test/unit'
5
+
6
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
+ $LOAD_PATH.unshift(__dir__)
8
+ require 'fluent/test'
9
+ require 'fluent/test/driver/filter'
10
+ require 'fluent/test/helpers'
11
+
12
+ Test::Unit::TestCase.include(Fluent::Test::Helpers)
13
+
14
+ require 'fluent/plugin/filter_postgresql_slowlog'
15
+ require 'fluent/plugin/filter_postgresql_redactor'
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PostgreSQLRedactorTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ @tag = 'test.tag'
9
+ end
10
+
11
+ CONFIG = '
12
+ <filter test.tag>
13
+ @type postgresql_redactor
14
+ </filter>
15
+ '
16
+
17
+ def create_driver(conf = CONFIG)
18
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLRedactor).configure(conf)
19
+ end
20
+
21
+ test 'filters SQL queries' do
22
+ d = create_driver
23
+
24
+ inputs = [
25
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
26
+ 'statement' => %(SELECT * FROM projects WHERE id = 1),
27
+ 'duration_s' => 2.3571 }
28
+ ]
29
+
30
+ d.run(default_tag: @tag) do
31
+ inputs.each { |input| d.feed(input) }
32
+ end
33
+
34
+ assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
35
+ assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
+ end
37
+
38
+ test 'handles parse errors' do
39
+ d = create_driver
40
+
41
+ input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
42
+
43
+ d.run(default_tag: @tag) do
44
+ d.feed(input)
45
+ end
46
+
47
+ assert_equal(%w[pg_query_error statement], d.filtered[0].last.keys.sort)
48
+ assert_equal(input['statement'], d.filtered[0].last['statement'])
49
+ end
50
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PostgreSQLSlowLogTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ @tag = 'test.tag'
9
+ end
10
+
11
+ CONFIG = '
12
+ <filter test.tag>
13
+ @type postgresql_slowlog
14
+ </filter>
15
+ '
16
+
17
+ def create_driver(conf = CONFIG)
18
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLSlowLog).configure(conf)
19
+ end
20
+
21
+ test 'filters basic slow log' do
22
+ d = create_driver
23
+
24
+ inputs = [
25
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects' },
26
+ { 'message' => 'duration: 1873.345 ms execute <unnamed>: SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' }
27
+ ]
28
+
29
+ d.run(default_tag: @tag) do
30
+ inputs.each { |input| d.feed(input) }
31
+ end
32
+
33
+ assert_equal(inputs[0].merge(
34
+ {
35
+ 'statement' => 'SELECT * FROM projects',
36
+ 'duration_s' => 2.3571
37
+ }
38
+ ),
39
+ d.filtered[0].last)
40
+ assert_equal(inputs[1].merge(
41
+ {
42
+ 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
43
+ 'duration_s' => 1.873345
44
+ }
45
+ ),
46
+ d.filtered[1].last)
47
+ end
48
+
49
+ test 'ignores messages not having to do with slow logs' do
50
+ d = create_driver
51
+ input = { 'message' => 'this is a test' }
52
+
53
+ d.run(default_tag: @tag) do
54
+ d.feed(input)
55
+ end
56
+
57
+ assert_equal(input, d.filtered[0].last)
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-postgresql-csvlog
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - stanhu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg_query
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.3'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.3'
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: test-unit
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.2'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '3.2'
75
+ description: fluentd plugins to work with PostgreSQL CSV logs
76
+ email:
77
+ - stanhu@gmail.com
78
+ executables: []
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - Gemfile
83
+ - Gemfile.lock
84
+ - LICENSE
85
+ - README.md
86
+ - fluent-plugin-postgresql-csvlog.gemspec
87
+ - lib/fluent/plugin/filter_postgresql_redactor.rb
88
+ - lib/fluent/plugin/filter_postgresql_slowlog.rb
89
+ - lib/fluent/plugin/parser_multiline_csv.rb
90
+ - test/helper.rb
91
+ - test/plugin/test_filter_postgresql_redactor.rb
92
+ - test/plugin/test_filter_postgresql_slowlog.rb
93
+ homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
94
+ licenses: []
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.1.4
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: fluentd plugins to work with PostgreSQL CSV logs
115
+ test_files: []