fluent-plugin-postgresql-csvlog 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9fd4d916e4d8cc9ebb39d62e847de1c2dca1ce0175d738ca55430a962b3283ab
4
+ data.tar.gz: b3e6baacf5bd683c39eca587c55ea0326ffe18a55d3b4f8b23ba435642641b8c
5
+ SHA512:
6
+ metadata.gz: 2e783b77515eb3ec55b4684e11d3014fdea761b057d22fb3935eafd7064d9fcb7038b8f78f7fa7e9aa238d339c8a5c3d1cac358a430c207f6ba9cd19bf8ae55a
7
+ data.tar.gz: 77dace38e2de7a851547ee25502943f0d610cceefa8ee4d1194e084301852bf3b1e8fd93e79144f6c81865d7aadfe3c3705a210f3ca4dee2029e2daa5fbdf989
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source 'https://rubygems.org'
2
+
3
+ gemspec
data/Gemfile.lock ADDED
@@ -0,0 +1,50 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ fluent-plugin-postgresql-csvlog (0.0.1)
5
+ fluentd (>= 1.0, < 2)
6
+ pg_query (~> 1.3)
7
+
8
+ GEM
9
+ remote: https://rubygems.org/
10
+ specs:
11
+ concurrent-ruby (1.1.8)
12
+ cool.io (1.7.0)
13
+ fluentd (1.12.0)
14
+ bundler
15
+ cool.io (>= 1.4.5, < 2.0.0)
16
+ http_parser.rb (>= 0.5.1, < 0.7.0)
17
+ msgpack (>= 1.3.1, < 2.0.0)
18
+ serverengine (>= 2.2.2, < 3.0.0)
19
+ sigdump (~> 0.2.2)
20
+ strptime (>= 0.2.2, < 1.0.0)
21
+ tzinfo (>= 1.0, < 3.0)
22
+ tzinfo-data (~> 1.0)
23
+ yajl-ruby (~> 1.0)
24
+ http_parser.rb (0.6.0)
25
+ msgpack (1.4.2)
26
+ pg_query (1.3.0)
27
+ power_assert (2.0.0)
28
+ rake (13.0.3)
29
+ serverengine (2.2.2)
30
+ sigdump (~> 0.2.2)
31
+ sigdump (0.2.4)
32
+ strptime (0.2.5)
33
+ test-unit (3.4.0)
34
+ power_assert
35
+ tzinfo (2.0.4)
36
+ concurrent-ruby (~> 1.0)
37
+ tzinfo-data (1.2021.1)
38
+ tzinfo (>= 1.0.0)
39
+ yajl-ruby (1.4.1)
40
+
41
+ PLATFORMS
42
+ ruby
43
+
44
+ DEPENDENCIES
45
+ fluent-plugin-postgresql-csvlog!
46
+ rake
47
+ test-unit (~> 3.2)
48
+
49
+ BUNDLED WITH
50
+ 2.1.4
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) 2011-present GitLab B.V.
2
+
3
+ Portions of this software are licensed as follows:
4
+
5
+ * All content residing under the "doc/" directory of this repository is licensed under "Creative Commons: CC BY-SA 4.0 license".
6
+ * All content that resides under the "ee/" directory of this repository, if that directory exists, is licensed under the license defined in "ee/LICENSE".
7
+ * All client-side JavaScript (when served directly or after being compiled, arranged, augmented, or combined), is licensed under the "MIT Expat" license.
8
+ * All third party components incorporated into the GitLab Software are licensed under the original license provided by the owner of the applicable component.
9
+ * Content outside of the above mentioned directories or restrictions above is available under the "MIT Expat" license as defined below.
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,64 @@
1
+ # Introduction
2
+
3
+ This repository holds several [Fluentd](http://fluentd.org) plugins to
4
+ parse PostgreSQL CSV log files and extract slow log information:
5
+
6
+ - `MultilineCSVParser`: Parses CSV files that span multiple lines
7
+ - `PostgreSQLSlowLog`: Extracts slow log entries into `duration_s` and `statement` fields
8
+ - `PostgreSQLRedactor`: Normalizes the SQL query and redacts sensitive information
9
+
10
+ ## Installation
11
+
12
+ Use RubyGems:
13
+
14
+ fluent-gem install fluent-plugin-postgresql-csvlog --no-document
15
+
16
+ ## Configuration
17
+
18
+ ### PostgreSQL configuration
19
+
20
+ ```conf
21
+ log_destination = 'csvlog'
22
+ log_collector = on
23
+ ```
24
+
25
+ ### Fluentd conf
26
+
27
+ The configuration below shows how you might use these filters to
28
+ ingest and parse PostgreSQL CSV logs:
29
+
30
+ ```conf
31
+ ## PostgreSQL csvlog (enabled with
32
+ <source>
33
+ @type tail
34
+ tag postgres.postgres_csv
35
+ path /var/log/postgresql/*.csv
36
+ pos_file /var/log/td-agent/postgres_csv.log.pos
37
+ <parse>
38
+ @type multiline_csv
39
+ format_firstline /^\d{4}-\d{1,2}-\d{1,2} \d{2}:\d{2}:\d{2}/
40
+ keys time,user_name,database_name,process_id,connection_from,session_id,session_line_num,command_tag,session_start_time,virtual_transaction_id,transaction_id,error_severity,sql_state_code,message,detail,hint,internal_query,internal_query_pos,context,query,query_pos,location,application_name
41
+ time_key time
42
+ time_format %Y-%m-%d %H:%M:%S.%N %Z
43
+ keep_time_key true
44
+ parser_type normal
45
+ </parse>
46
+ </source>
47
+
48
+ <filter postgres.postgres_csv>
49
+ @type postgresql_slowlog
50
+ </filter>
51
+
52
+ <filter postgres.postgres_csv>
53
+ @type postgresql_redactor
54
+ </filter>
55
+
56
+ # Output resulting JSON file to a directory in /tmp
57
+ <match postgres.*>
58
+ @type file
59
+ path /tmp/fluent-test
60
+ <format>
61
+ @type json
62
+ </format>
63
+ </match>
64
+ ```
@@ -0,0 +1,22 @@
1
+ $:.push File.expand_path('lib', __dir__)
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = 'fluent-plugin-postgresql-csvlog'
5
+ s.version = '0.0.1'
6
+ s.authors = ['stanhu']
7
+ s.email = ['stanhu@gmail.com']
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
9
+ s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
+ s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
+
12
+ s.files = `git ls-files`.split("\n")
13
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
15
+ s.require_paths = ['lib']
16
+
17
+ s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
+ s.add_dependency 'pg_query', '~> 1.3'
19
+
20
+ s.add_development_dependency 'rake'
21
+ s.add_development_dependency 'test-unit', '~> 3.2'
22
+ end
@@ -0,0 +1,30 @@
1
+ require 'fluent/plugin/filter'
2
+ require 'pg_query'
3
+
4
+ module Fluent::Plugin
5
+ class PostgreSQLRedactor < Filter
6
+ Fluent::Plugin.register_filter('postgresql_redactor', self)
7
+
8
+ def configure(conf)
9
+ super
10
+ end
11
+
12
+ def filter(_tag, _time, record)
13
+ statement = record['statement']
14
+
15
+ return record unless statement
16
+
17
+ normalized = PgQuery.normalize(statement)
18
+ record.delete('statement')
19
+ record['sql'] = normalized
20
+ record.delete('message')
21
+
22
+ record
23
+ rescue PgQuery::ParseError
24
+ # pg_query currently only supports PostgresQL 10:
25
+ # https://github.com/lfittl/pg_query/issues/184
26
+ record['pg_query_error'] = true
27
+ record
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent
6
+ module Plugin
7
+ # Filters PostgreSQL slow log duration and statements from parsed record.
8
+ #
9
+ # Examples:
10
+ # duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects
11
+ # duration: 1873.345 ms execute <unnamed>: SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/
12
+ class PostgreSQLSlowLog < Filter
13
+ Fluent::Plugin.register_filter('postgresql_slowlog', self)
14
+
15
+ SLOWLOG_REGEXP = /^duration: (\d+(?:\.\d+)?) ms .*?:\s*(.*)/m.freeze
16
+
17
+ def filter(_tag, _time, record)
18
+ return record unless record.key?('message')
19
+
20
+ # rubocop:disable Style/PerlBackrefs
21
+ if record['message'] =~ SLOWLOG_REGEXP
22
+ record['duration_s'] = $1.to_f / 1000.0
23
+ record['statement'] = $2
24
+ end
25
+ # rubocop:enable Style/PerlBackrefs
26
+
27
+ record
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/parser_csv'
4
+
5
+ module Fluent
6
+ module Plugin
7
+ # This class can be used to parse CSV files that span multiple lines.
8
+ # Like the [multiline parser](https://docs.fluentd.org/parser/multiline),
9
+ # define a `format_firstline` for the `tail` input module to match against.
10
+ # Then use the [csv parser](https://docs.fluentd.org/parser/csv) parameters.
11
+ class MultilineCSVParser < CSVParser
12
+ Plugin.register_parser('multiline_csv', self)
13
+
14
+ desc 'Specify regexp pattern for start line of multiple lines'
15
+ config_param :format_firstline, :string, default: nil
16
+
17
+ def configure(conf)
18
+ super
19
+
20
+ if @format_firstline
21
+ check_format_regexp(@format_firstline, 'format_firstline')
22
+ @firstline_regex = Regexp.new(@format_firstline[1..-2])
23
+ end
24
+ end
25
+
26
+ # Used by in_tail:
27
+ # https://github.com/fluent/fluentd/blob/47be96e3a98fa247b59e479f7c62bfeff1a9ec55/lib/fluent/plugin/in_tail.rb#L523
28
+ def has_firstline?
29
+ !!@format_firstline
30
+ end
31
+
32
+ # Used by in_tail:
33
+ # https://github.com/fluent/fluentd/blob/47be96e3a98fa247b59e479f7c62bfeff1a9ec55/lib/fluent/plugin/in_tail.rb#L526
34
+ def firstline?(text)
35
+ @firstline_regex.match(text)
36
+ end
37
+
38
+ private
39
+
40
+ def check_format_regexp(format, key)
41
+ if format[0] == '/' && format[-1] == '/'
42
+ begin
43
+ Regexp.new(format[1..-2])
44
+ rescue StandardError => e
45
+ raise ConfigError, "Invalid regexp in #{key}: #{e}"
46
+ end
47
+ else
48
+ raise ConfigError, "format_firstline should be Regexp, need //: '#{format}'"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/setup'
4
+ require 'test/unit'
5
+
6
+ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
+ $LOAD_PATH.unshift(__dir__)
8
+ require 'fluent/test'
9
+ require 'fluent/test/driver/filter'
10
+ require 'fluent/test/helpers'
11
+
12
+ Test::Unit::TestCase.include(Fluent::Test::Helpers)
13
+
14
+ require 'fluent/plugin/filter_postgresql_slowlog'
15
+ require 'fluent/plugin/filter_postgresql_redactor'
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PostgreSQLRedactorTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ @tag = 'test.tag'
9
+ end
10
+
11
+ CONFIG = '
12
+ <filter test.tag>
13
+ @type postgresql_redactor
14
+ </filter>
15
+ '
16
+
17
+ def create_driver(conf = CONFIG)
18
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLRedactor).configure(conf)
19
+ end
20
+
21
+ test 'filters SQL queries' do
22
+ d = create_driver
23
+
24
+ inputs = [
25
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
26
+ 'statement' => %(SELECT * FROM projects WHERE id = 1),
27
+ 'duration_s' => 2.3571 }
28
+ ]
29
+
30
+ d.run(default_tag: @tag) do
31
+ inputs.each { |input| d.feed(input) }
32
+ end
33
+
34
+ assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
35
+ assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
+ end
37
+
38
+ test 'handles parse errors' do
39
+ d = create_driver
40
+
41
+ input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
42
+
43
+ d.run(default_tag: @tag) do
44
+ d.feed(input)
45
+ end
46
+
47
+ assert_equal(%w[pg_query_error statement], d.filtered[0].last.keys.sort)
48
+ assert_equal(input['statement'], d.filtered[0].last['statement'])
49
+ end
50
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PostgreSQLSlowLogTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ @tag = 'test.tag'
9
+ end
10
+
11
+ CONFIG = '
12
+ <filter test.tag>
13
+ @type postgresql_slowlog
14
+ </filter>
15
+ '
16
+
17
+ def create_driver(conf = CONFIG)
18
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLSlowLog).configure(conf)
19
+ end
20
+
21
+ test 'filters basic slow log' do
22
+ d = create_driver
23
+
24
+ inputs = [
25
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects' },
26
+ { 'message' => 'duration: 1873.345 ms execute <unnamed>: SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' }
27
+ ]
28
+
29
+ d.run(default_tag: @tag) do
30
+ inputs.each { |input| d.feed(input) }
31
+ end
32
+
33
+ assert_equal(inputs[0].merge(
34
+ {
35
+ 'statement' => 'SELECT * FROM projects',
36
+ 'duration_s' => 2.3571
37
+ }
38
+ ),
39
+ d.filtered[0].last)
40
+ assert_equal(inputs[1].merge(
41
+ {
42
+ 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
43
+ 'duration_s' => 1.873345
44
+ }
45
+ ),
46
+ d.filtered[1].last)
47
+ end
48
+
49
+ test 'ignores messages not having to do with slow logs' do
50
+ d = create_driver
51
+ input = { 'message' => 'this is a test' }
52
+
53
+ d.run(default_tag: @tag) do
54
+ d.feed(input)
55
+ end
56
+
57
+ assert_equal(input, d.filtered[0].last)
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,115 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-postgresql-csvlog
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - stanhu
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-02-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '1.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg_query
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.3'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.3'
47
+ - !ruby/object:Gem::Dependency
48
+ name: rake
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: test-unit
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '3.2'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '3.2'
75
+ description: fluentd plugins to work with PostgreSQL CSV logs
76
+ email:
77
+ - stanhu@gmail.com
78
+ executables: []
79
+ extensions: []
80
+ extra_rdoc_files: []
81
+ files:
82
+ - Gemfile
83
+ - Gemfile.lock
84
+ - LICENSE
85
+ - README.md
86
+ - fluent-plugin-postgresql-csvlog.gemspec
87
+ - lib/fluent/plugin/filter_postgresql_redactor.rb
88
+ - lib/fluent/plugin/filter_postgresql_slowlog.rb
89
+ - lib/fluent/plugin/parser_multiline_csv.rb
90
+ - test/helper.rb
91
+ - test/plugin/test_filter_postgresql_redactor.rb
92
+ - test/plugin/test_filter_postgresql_slowlog.rb
93
+ homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
94
+ licenses: []
95
+ metadata: {}
96
+ post_install_message:
97
+ rdoc_options: []
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ version: '0'
105
+ required_rubygems_version: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ requirements: []
111
+ rubygems_version: 3.1.4
112
+ signing_key:
113
+ specification_version: 4
114
+ summary: fluentd plugins to work with PostgreSQL CSV logs
115
+ test_files: []