fluent-plugin-postgresql-csvlog 0.0.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 002b7169e3f3ac6493eb09dca5c2e3820a944f38c64a0b45641f3cdbd1717ddf
4
- data.tar.gz: eac3646a2404665924c4fa30114b91ecce49dbadef3919d89eab458450568328
3
+ metadata.gz: 81cfb53854390b0aae43fd6f8cfaf05e11c4251a4442bf40ccdee016db11b3f6
4
+ data.tar.gz: 0e705c2af91af84e400e96d41b5275f0d01f4d6dc4ec0a37b3bfed0974e8a424
5
5
  SHA512:
6
- metadata.gz: 354478f3573f0934dcee72305069aa8f25333087d1667fc4e127963b8f8f955b66a02462925e4672f27f80e43b772640a68a4760266f461704bd9486ad66a3ef
7
- data.tar.gz: f6d89c2db73d337b1aa52e9838ab08f6f3bda80e7053307482fa5b284b4b6f81ae0389e2a20411fbcc383c878b5309a320946d5849d43fa466e469037b9d5102
6
+ metadata.gz: 24bb505d30f06847e16b2f631e1e2ce011559edcc11724556d62abc0089ee99b00e5b911c1f23d87740720fd4d57235af1ee104991cbff72ac8ba790c700280a
7
+ data.tar.gz: d42ae8976276b4392c1963d6c83e7691732f7eec8dbf87eaf270e3b4547ceba8f9f2b76056006163d6b594e0791b4a9b49183c6ae1793610e7f6555a92b9d190
data/.gitlab-ci.yml CHANGED
@@ -2,9 +2,28 @@ image: "ruby:2.7"
2
2
 
3
3
  test:
4
4
  before_script:
5
+ - bundle config set path vendor
5
6
  - bundle install --jobs $(nproc)
6
7
  script:
7
8
  - bundle exec rake test
8
9
  cache:
9
10
  paths:
10
11
  - vendor/ruby
12
+
13
+ # integration tests
14
+ itest:
15
+ services:
16
+ - name: postgres:12
17
+ alias: postgres
18
+ command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
+ variables:
20
+ POSTGRES_USER: testuser
21
+ POSTGRES_PASSWORD: testpass
22
+ before_script:
23
+ - bundle config set path vendor
24
+ - bundle install --jobs $(nproc)
25
+ script:
26
+ - bundle exec rake itest
27
+ cache:
28
+ paths:
29
+ - vendor/ruby
data/Dockerfile ADDED
@@ -0,0 +1,18 @@
1
+ # Dockerfile useful for manual testing purposes
2
+ FROM fluent/fluentd:v1.13-1
3
+ USER root
4
+ RUN apk add bash alpine-sdk postgresql-dev postgresql-client ruby-dev
5
+
6
+ WORKDIR /src/
7
+
8
+ ADD Gemfile /src/
9
+ ADD fluent-plugin-postgresql-csvlog.gemspec /src/
10
+
11
+ ADD . /src/
12
+
13
+ RUN bundle install
14
+ RUN rake build
15
+
16
+ RUN fluent-gem install pkg/*.gem
17
+
18
+ ENTRYPOINT [ "/bin/bash"]
data/README.md CHANGED
@@ -47,15 +47,20 @@ ingest and parse PostgreSQL CSV logs:
47
47
 
48
48
  <filter postgres.postgres_csv>
49
49
  @type postgresql_slowlog
50
+ output_key query
50
51
  </filter>
51
52
 
52
53
  <filter postgres.postgres_csv>
53
54
  @type postgresql_redactor
55
+ input_key query
56
+ output_key sql
57
+ fingerprint_key fingerprint
54
58
  </filter>
55
59
 
56
60
  <filter postgres.postgres_csv>
57
61
  @type marginalia
58
62
  key sql
63
+ strip_comment true
59
64
  </filter>
60
65
 
61
66
  # Output resulting JSON file to a directory in /tmp
data/Rakefile CHANGED
@@ -9,4 +9,10 @@ Rake::TestTask.new(:test) do |test|
9
9
  test.verbose = true
10
10
  end
11
11
 
12
+ Rake::TestTask.new(:itest) do |test|
13
+ test.libs << 'lib' << 'test'
14
+ test.test_files = FileList['test/**/itest_*.rb']
15
+ test.verbose = true
16
+ end
17
+
12
18
  task :default => [:build]
@@ -0,0 +1,19 @@
1
+ # Docker Compose setup useful for testing and development purposes
2
+ version: "3.9"
3
+ services:
4
+ fluentd:
5
+ build: .
6
+ links:
7
+ - postgres
8
+ entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
9
+ postgres:
10
+ image: postgres
11
+ restart: always
12
+ environment:
13
+ - POSTGRES_USER=testuser
14
+ - POSTGRES_PASSWORD=testpass
15
+ ports:
16
+ - '5438:5432'
17
+ command: postgres -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all
18
+ volumes:
19
+ - ./sql/create_extension.sql:/docker-entrypoint-initdb.d/create_extension.sql
@@ -0,0 +1,12 @@
1
+ <source>
2
+ @type pg_stat_statements
3
+ tag postgres.pg_stat_statements
4
+ host postgres
5
+ username testuser
6
+ password testpass
7
+ interval 1
8
+ </source>
9
+
10
+ <match postgres.pg_stat_statements>
11
+ @type stdout
12
+ </match>
@@ -2,10 +2,10 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.0.2'
5
+ s.version = '0.3.1'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
- s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
9
9
  s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
10
  s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
11
 
@@ -15,7 +15,8 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
- s.add_dependency 'pg_query', '~> 1.3'
18
+ s.add_dependency 'pg', '~> 1.1'
19
+ s.add_dependency 'pg_query', '~> 2.0'
19
20
 
20
21
  s.add_development_dependency 'rake'
21
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -16,9 +16,11 @@ module Fluent
16
16
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
17
17
  config_param :key, :string, default: 'sql'
18
18
 
19
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/).*}m.freeze
20
- MARGINALIA_APPENDED_REGEXP = %r{.*(?<comment>/\*.*\*/)$}m.freeze
21
- MARGINALIA_KEY_VALUE_REGEXP = /^(?<key>.*):?(?<value>.*)$/.freeze
19
+ desc 'Whether to strip the comment from the record specified by key'
20
+ config_param :strip_comment, :bool, default: true
21
+
22
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
22
24
 
23
25
  def filter(_tag, _time, record)
24
26
  parse_comments(record)
@@ -38,15 +40,9 @@ module Fluent
38
40
  return unless comment_match
39
41
 
40
42
  entries = extract_entries(comment_match['comment'])
43
+ parse_entries(entries, record)
41
44
 
42
- entries.each do |component|
43
- data = component.split(':', 2)
44
-
45
- break unless data.length == 2
46
-
47
- stored_key = store_key(record, data[0])
48
- record[stored_key] = data[1]
49
- end
45
+ record[@key] = comment_match['sql'].strip if @strip_comment
50
46
  end
51
47
 
52
48
  def match_marginalia_comment(sql)
@@ -73,7 +69,21 @@ module Fluent
73
69
  comment.gsub!(%r{\*/$}, '')
74
70
  end
75
71
 
72
+ def parse_entries(entries, record)
73
+ entries.each do |component|
74
+ data = component.split(':', 2)
75
+
76
+ break unless data.length == 2
77
+
78
+ stored_key = store_key(record, data[0])
79
+ record[stored_key] = data[1]
80
+ end
81
+ end
82
+
76
83
  def store_key(record, component_key)
84
+ # In case there is a conflict with the Marginalia key
85
+ # (e.g. `correlation_id`), we use the base key
86
+ # (`sql_correlation_id`) instead.
77
87
  if record.key?(component_key)
78
88
  "#{@key}_#{component_key}"
79
89
  else
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fluent/plugin/filter'
2
4
  require 'pg_query'
3
5
 
@@ -5,24 +7,28 @@ module Fluent::Plugin
5
7
  class PostgreSQLRedactor < Filter
6
8
  Fluent::Plugin.register_filter('postgresql_redactor', self)
7
9
 
8
- def configure(conf)
9
- super
10
- end
10
+ desc 'Input field to parse for SQL queries'
11
+ config_param :input_key, :string, default: 'query'
12
+
13
+ desc 'Output field to store SQL queries'
14
+ config_param :output_key, :string, default: 'sql'
15
+
16
+ desc 'Name of field to store SQL query fingerprint'
17
+ config_param :fingerprint_key, :string, default: 'fingerprint'
11
18
 
12
19
  def filter(_tag, _time, record)
13
- statement = record['statement']
20
+ statement = record[@input_key]
14
21
 
15
22
  return record unless statement
16
23
 
17
24
  normalized = PgQuery.normalize(statement)
18
- record.delete('statement')
19
- record['sql'] = normalized
20
- record.delete('message')
25
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
26
+
27
+ record.delete(@input_key)
28
+ record[@output_key] = normalized
21
29
 
22
30
  record
23
31
  rescue PgQuery::ParseError
24
- # pg_query currently only supports PostgresQL 10:
25
- # https://github.com/lfittl/pg_query/issues/184
26
32
  record['pg_query_error'] = true
27
33
  record
28
34
  end
@@ -12,6 +12,9 @@ module Fluent
12
12
  class PostgreSQLSlowLog < Filter
13
13
  Fluent::Plugin.register_filter('postgresql_slowlog', self)
14
14
 
15
+ desc 'Field to output SQL queries'
16
+ config_param :output_key, :string, default: 'query'
17
+
15
18
  SLOWLOG_REGEXP = /^duration: (\d+(?:\.\d+)?) ms .*?:\s*(.*)/m.freeze
16
19
 
17
20
  def filter(_tag, _time, record)
@@ -20,7 +23,8 @@ module Fluent
20
23
  # rubocop:disable Style/PerlBackrefs
21
24
  if record['message'] =~ SLOWLOG_REGEXP
22
25
  record['duration_s'] = $1.to_f / 1000.0
23
- record['statement'] = $2
26
+ record[@output_key] = $2
27
+ record.delete('message')
24
28
  end
25
29
  # rubocop:enable Style/PerlBackrefs
26
30
 
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent::Plugin
8
+ # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
9
+ # for queryid to query mappings. These are then normalized for security purposes
10
+ # fingerprinted and emitted as records with the following format:
11
+ # {
12
+ # 'fingerprint' => '8a6e9896bd9048a2',
13
+ # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
14
+ # 'query_length' => 58,
15
+ # 'queryid' => 3239318621761098074
16
+ # }
17
+ class PgStatStatementsInput < Input
18
+ Fluent::Plugin.register_input('pg_stat_statements', self)
19
+
20
+ desc 'PostgreSQL host'
21
+ config_param :host, :string
22
+
23
+ desc 'RDBMS port (default: 5432)'
24
+ config_param :port, :integer, default: 5432
25
+
26
+ desc 'login user name'
27
+ config_param :username, :string, default: nil
28
+
29
+ desc 'postgres db'
30
+ config_param :dbname, :string, default: nil
31
+
32
+ desc 'login password'
33
+ config_param :password, :string, default: nil, secret: true
34
+
35
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
+ # for options
37
+ desc 'postgres sslmode'
38
+ config_param :sslmode, :string, default: 'prefer'
39
+
40
+ desc 'tag'
41
+ config_param :tag, :string, default: nil
42
+
43
+ desc 'interval in second to run query'
44
+ config_param :interval, :time, default: 300
45
+
46
+ desc 'Name of field to store SQL query fingerprint'
47
+ config_param :fingerprint_key, :string, default: 'fingerprint'
48
+
49
+ def start
50
+ @stop_flag = false
51
+ @thread = Thread.new(&method(:thread_main))
52
+ end
53
+
54
+ def shutdown
55
+ @stop_flag = true
56
+
57
+ # Interrupt thread and wait for it to finish
58
+ Thread.new { @thread.run } if @thread
59
+ @thread.join
60
+ end
61
+
62
+ def thread_main
63
+ until @stop_flag
64
+ sleep @interval
65
+ break if @stop_flag
66
+
67
+ begin
68
+ with_connection do |conn|
69
+ emit_statements_to_stream(conn)
70
+ end
71
+ rescue StandardError => e
72
+ log.error 'unexpected error', error: e.message, error_class: e.class
73
+ log.error_backtrace e.backtrace
74
+ end
75
+ end
76
+ end
77
+
78
+ # Returns a fluentd record for a query row
79
+ def record_for_row(row)
80
+ query = row['query']
81
+
82
+ # We record the query_length as it will help in understanding whether unparseable
83
+ # queries are truncated.
84
+ record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
85
+
86
+ return record unless query
87
+
88
+ normalized = PgQuery.normalize(query)
89
+ record['query'] = normalized
90
+
91
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
92
+
93
+ record
94
+ rescue PgQuery::ParseError
95
+ record['query_unparseable'] = true
96
+
97
+ record
98
+ end
99
+
100
+ private
101
+
102
+ # Query the database and emit statements to fluentd router
103
+ def emit_statements_to_stream(conn)
104
+ me = Fluent::MultiEventStream.new
105
+
106
+ now = Fluent::Engine.now
107
+ conn.exec('SELECT queryid, query FROM pg_stat_statements').each do |row|
108
+ record = record_for_row(row)
109
+ me.add(now, record)
110
+ end
111
+
112
+ @router.emit_stream(@tag, me)
113
+ end
114
+
115
+ # Since this query is very infrequent, and it may be communicating directly
116
+ # with postgres without pgbouncer, don't use a persistent connection and
117
+ # ensure that it is properly closed
118
+ def with_connection(&block)
119
+ conn = PG.connect(
120
+ host: @host,
121
+ dbname: @dbname,
122
+ sslmode: @sslmode,
123
+ user: @username,
124
+ password: @password
125
+ )
126
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
+
128
+ begin
129
+ block.call(conn)
130
+ ensure
131
+ # Always close the connection
132
+ conn.finish
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1 @@
1
+ CREATE EXTENSION pg_stat_statements;
data/test/helper.rb CHANGED
@@ -7,10 +7,13 @@ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
7
  $LOAD_PATH.unshift(__dir__)
8
8
  require 'fluent/test'
9
9
  require 'fluent/test/driver/filter'
10
+ require 'fluent/test/driver/input'
10
11
  require 'fluent/test/helpers'
11
12
 
12
13
  Test::Unit::TestCase.include(Fluent::Test::Helpers)
14
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
13
15
 
14
16
  require 'fluent/plugin/filter_postgresql_slowlog'
15
17
  require 'fluent/plugin/filter_postgresql_redactor'
16
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/in_pg_stat_statements'
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
6
+ # The defaults values work with the configuration in .gitlab-ci.yml on the postgres service
7
+ # Override with env vars for local development
8
+ HOST = ENV.fetch('PG_TEST_HOST', 'postgres')
9
+ USERNAME = ENV.fetch('PG_TEST_USER', 'testuser')
10
+ PASSWORD = ENV.fetch('PG_TEST_PASSWORD', 'testpass')
11
+
12
+ def setup
13
+ Fluent::Test.setup
14
+
15
+ @conn = PG.connect(
16
+ host: HOST,
17
+ user: USERNAME,
18
+ password: PASSWORD
19
+ )
20
+
21
+ try_setup_extension
22
+ create_known_statement
23
+ end
24
+
25
+ def teardown
26
+ @conn&.finish
27
+ end
28
+
29
+ # Setup pg_stat_statements extension
30
+ def try_setup_extension
31
+ @conn.exec('CREATE EXTENSION pg_stat_statements')
32
+ rescue PG::DuplicateObject
33
+ end
34
+
35
+ # This statement gives us something to look for in the emitted stream
36
+ def create_known_statement
37
+ @conn.exec('SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT 1')
38
+ end
39
+
40
+ VALID_CONFIG = %(
41
+ tag postgres.pg_stat_statements
42
+ host #{HOST}
43
+ username #{USERNAME}
44
+ password #{PASSWORD}
45
+ interval 1
46
+ )
47
+
48
+ INVALID_CONFIG = %(
49
+ host 'invalid_host.dne'
50
+ port 1234
51
+ username #{USERNAME}
52
+ password #{PASSWORD}
53
+ interval 1
54
+ )
55
+
56
+ def create_driver(config)
57
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatStatementsInput).configure(config)
58
+ end
59
+
60
+ sub_test_case 'configuration' do
61
+ test 'connects' do
62
+ d = create_driver(VALID_CONFIG)
63
+
64
+ emits = []
65
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
66
+ d.run(num_waits = 50) do
67
+ emits = d.emits
68
+ end
69
+
70
+ assert_false emits.empty?
71
+ end
72
+
73
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
74
+ # the fluentd configuration to fail. We would rather retry until we get a connection
75
+ test 'connects for an invalid config' do
76
+ d = create_driver(INVALID_CONFIG)
77
+
78
+ emits = []
79
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
80
+ d.run(num_waits = 50) do
81
+ emits = d.emits
82
+ end
83
+
84
+ assert_true emits.empty?
85
+ end
86
+ end
87
+
88
+ sub_test_case 'execution' do
89
+ test 'connects' do
90
+ d = create_driver(VALID_CONFIG)
91
+
92
+ emits = []
93
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
94
+ d.run(num_waits = 50) do
95
+ emits = d.emits
96
+ end
97
+
98
+ expected_record = {
99
+ 'fingerprint' => '8a6e9896bd9048a2',
100
+ 'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
101
+ 'query_length' => 58,
102
+ 'queryid' => 3_239_318_621_761_098_074
103
+ }
104
+ known_statement_event = emits.find do |event|
105
+ record = event[2]
106
+ record['query'] == expected_record['query']
107
+ end
108
+
109
+ assert_false known_statement_event.nil?
110
+
111
+ tag = known_statement_event[0]
112
+ record = known_statement_event[2]
113
+
114
+ assert_equal 'postgres.pg_stat_statements', tag
115
+ assert_equal expected_record['fingerprint'], record['fingerprint']
116
+ assert_equal expected_record['query_length'], record['query_length']
117
+ assert_true expected_record.include? 'queryid'
118
+ end
119
+ end
120
+ end
@@ -22,15 +22,16 @@ class Marginalia < Test::Unit::TestCase
22
22
  inputs = [
23
23
  { 'statement' => 'SELECT * FROM projects' },
24
24
  { 'statement' => 'SELECT COUNT(*) FROM "projects" /* this is just a comment */' },
25
- { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' }
25
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' },
26
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/' }
26
27
  ]
27
28
 
28
29
  d.run(default_tag: @tag) do
29
30
  inputs.each { |input| d.feed(input) }
30
31
  end
31
32
 
32
- assert_equal(inputs[0].merge, d.filtered[0].last)
33
- assert_equal(inputs[1].merge, d.filtered[1].last)
33
+ assert_equal(inputs[0], d.filtered[0].last)
34
+ assert_equal(inputs[1], d.filtered[1].last)
34
35
  assert_equal(inputs[2].merge(
35
36
  {
36
37
  'application' => 'sidekiq',
@@ -40,6 +41,19 @@ class Marginalia < Test::Unit::TestCase
40
41
  }
41
42
  ),
42
43
  d.filtered[2].last)
44
+ assert_equal(inputs[3].merge(
45
+ {
46
+ 'application' => 'web',
47
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
48
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
49
+ }
50
+ ),
51
+ d.filtered[3].last)
52
+
53
+ assert_equal('SELECT * FROM projects', d.filtered[0].last['statement'])
54
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
55
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
56
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[3].last['statement'])
43
57
  end
44
58
 
45
59
  test 'parses prepended Marginalia comments' do
@@ -48,6 +62,7 @@ class Marginalia < Test::Unit::TestCase
48
62
  inputs = [
49
63
  { 'statement' => '/* this is just a comment */ SELECT COUNT(*) FROM "projects"' },
50
64
  { 'statement' => '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"' },
65
+ { 'statement' => '/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT COUNT(*) FROM "projects"' },
51
66
  { 'statement' => '/*application:sidekiq*/ SELECT COUNT(*) FROM "projects"',
52
67
  'application' => 'test-conflict' }
53
68
  ]
@@ -56,7 +71,7 @@ class Marginalia < Test::Unit::TestCase
56
71
  inputs.each { |input| d.feed(input) }
57
72
  end
58
73
 
59
- assert_equal(inputs[0].merge, d.filtered[0].last)
74
+ assert_equal(inputs[0], d.filtered[0].last)
60
75
  assert_equal(inputs[1].merge(
61
76
  {
62
77
  'application' => 'sidekiq',
@@ -68,9 +83,45 @@ class Marginalia < Test::Unit::TestCase
68
83
  d.filtered[1].last)
69
84
  assert_equal(inputs[2].merge(
70
85
  {
71
- 'statement_application' => 'sidekiq'
86
+ 'application' => 'web',
87
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
88
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
72
89
  }
73
90
  ),
74
91
  d.filtered[2].last)
92
+ assert_equal(inputs[3].merge(
93
+ {
94
+ 'statement_application' => 'sidekiq'
95
+ }
96
+ ),
97
+ d.filtered[3].last)
98
+
99
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[0].last['statement'])
100
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
101
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
102
+ end
103
+
104
+ test 'parses Marginalia comments with strip_comment disabled' do
105
+ d = create_driver(
106
+ <<~CONF
107
+ strip_comment false
108
+ key sql
109
+ CONF
110
+ )
111
+
112
+ sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/)
113
+ appended_sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/')
114
+
115
+ inputs = [
116
+ { 'sql' => sql },
117
+ { 'sql' => appended_sql }
118
+ ]
119
+
120
+ d.run(default_tag: @tag) do
121
+ inputs.each { |input| d.feed(input) }
122
+ end
123
+
124
+ assert_equal(sql, d.filtered[0].last['sql'])
125
+ assert_equal(appended_sql, d.filtered[1].last['sql'])
75
126
  end
76
127
  end
@@ -8,11 +8,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
8
8
  @tag = 'test.tag'
9
9
  end
10
10
 
11
- CONFIG = '
12
- <filter test.tag>
13
- @type postgresql_redactor
14
- </filter>
15
- '
11
+ CONFIG = ''
16
12
 
17
13
  def create_driver(conf = CONFIG)
18
14
  Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLRedactor).configure(conf)
@@ -23,7 +19,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
23
19
 
24
20
  inputs = [
25
21
  { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
26
- 'statement' => %(SELECT * FROM projects WHERE id = 1),
22
+ 'query' => %(SELECT * FROM projects WHERE id = 1),
27
23
  'duration_s' => 2.3571 }
28
24
  ]
29
25
 
@@ -31,20 +27,43 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
31
27
  inputs.each { |input| d.feed(input) }
32
28
  end
33
29
 
34
- assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
30
+ assert_equal(%w[duration_s fingerprint message sql], d.filtered[0].last.keys.sort)
35
31
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
32
  end
37
33
 
38
34
  test 'handles parse errors' do
39
35
  d = create_driver
40
36
 
41
- input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
37
+ input = { 'query' => 'create index something test (bla) include (bar)' }
42
38
 
43
39
  d.run(default_tag: @tag) do
44
40
  d.feed(input)
45
41
  end
46
42
 
47
- assert_equal(%w[pg_query_error statement], d.filtered[0].last.keys.sort)
48
- assert_equal(input['statement'], d.filtered[0].last['statement'])
43
+ assert_equal(%w[pg_query_error query], d.filtered[0].last.keys.sort)
44
+ assert_equal(input['query'], d.filtered[0].last['query'])
45
+ end
46
+
47
+ test 'uses configured input and output keys' do
48
+ d = create_driver(<<~CONF
49
+ input_key sql
50
+ output_key out_sql
51
+ CONF
52
+ )
53
+
54
+ inputs = [
55
+ {
56
+ 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
57
+ 'sql' => %(SELECT * FROM projects WHERE id = 1),
58
+ 'duration_s' => 2.3571
59
+ }
60
+ ]
61
+
62
+ d.run(default_tag: @tag) do
63
+ inputs.each { |input| d.feed(input) }
64
+ end
65
+
66
+ assert_equal(%w[duration_s fingerprint message out_sql], d.filtered[0].last.keys.sort)
67
+ assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['out_sql'])
49
68
  end
50
69
  end
@@ -32,18 +32,21 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
32
32
 
33
33
  assert_equal(inputs[0].merge(
34
34
  {
35
- 'statement' => 'SELECT * FROM projects',
35
+ 'query' => 'SELECT * FROM projects',
36
36
  'duration_s' => 2.3571
37
37
  }
38
38
  ),
39
39
  d.filtered[0].last)
40
40
  assert_equal(inputs[1].merge(
41
41
  {
42
- 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
42
+ 'query' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
43
43
  'duration_s' => 1.873345
44
44
  }
45
45
  ),
46
46
  d.filtered[1].last)
47
+
48
+ assert_equal(%w[duration_s query], d.filtered[0].last.keys.sort)
49
+ assert_equal(%w[duration_s query], d.filtered[1].last.keys.sort)
47
50
  end
48
51
 
49
52
  test 'ignores messages not having to do with slow logs' do
@@ -55,5 +58,32 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
55
58
  end
56
59
 
57
60
  assert_equal(input, d.filtered[0].last)
61
+ assert_equal(%w[message], d.filtered[0].last.keys.sort)
62
+ end
63
+
64
+ test 'outputs slow log entries to configured output key' do
65
+ d = create_driver(
66
+ <<~CONF
67
+ output_key my_key
68
+ CONF
69
+ )
70
+
71
+ inputs = [
72
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects' }
73
+ ]
74
+
75
+ d.run(default_tag: @tag) do
76
+ inputs.each { |input| d.feed(input) }
77
+ end
78
+
79
+ assert_equal(inputs[0].merge(
80
+ {
81
+ 'my_key' => 'SELECT * FROM projects',
82
+ 'duration_s' => 2.3571
83
+ }
84
+ ),
85
+ d.filtered[0].last)
86
+
87
+ assert_equal(%w[duration_s my_key], d.filtered[0].last.keys.sort)
58
88
  end
59
89
  end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %(
11
+ tag postgres.pg_stat_statements
12
+ host localhost
13
+ port 1234
14
+ dbname gitlab
15
+ sslmode require
16
+ username moo
17
+ password secret
18
+ interval 600
19
+ fingerprint_key fingerprint
20
+ )
21
+
22
+ def create_driver
23
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatStatementsInput).configure(CONFIG)
24
+ end
25
+
26
+ sub_test_case 'configuration' do
27
+ test 'basic configuration' do
28
+ d = create_driver
29
+
30
+ assert_equal 'postgres.pg_stat_statements', d.instance.tag
31
+ assert_equal 'localhost', d.instance.host
32
+ assert_equal 1234, d.instance.port
33
+ assert_equal 'gitlab', d.instance.dbname
34
+ assert_equal 'require', d.instance.sslmode
35
+ assert_equal 'moo', d.instance.username
36
+ assert_equal 'secret', d.instance.password
37
+ assert_equal 600, d.instance.interval
38
+ assert_equal 'fingerprint', d.instance.fingerprint_key
39
+ end
40
+ end
41
+
42
+ sub_test_case 'execution' do
43
+ test 'sql' do
44
+ d = create_driver
45
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
46
+
47
+ expected = {
48
+ 'fingerprint' => 'c071dee80d466e7d',
49
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
50
+ 'query_length' => 37,
51
+ 'queryid' => '1234'
52
+ }
53
+
54
+ assert_equal expected, record
55
+ end
56
+
57
+ test 'nil query' do
58
+ d = create_driver
59
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
60
+
61
+ expected = { 'query_length' => nil, 'queryid' => '1234' }
62
+ assert_equal expected, record
63
+ end
64
+
65
+ test 'ddl query' do
66
+ d = create_driver
67
+ ddl_sql = <<-SQL
68
+ CREATE TABLE accounts (
69
+ user_id serial PRIMARY KEY,
70
+ username VARCHAR(50) UNIQUE NOT NULL,
71
+ password VARCHAR(50) NOT NULL,
72
+ email VARCHAR(255) UNIQUE NOT NULL,
73
+ created_on TIMESTAMP NOT NULL,
74
+ last_login TIMESTAMP
75
+ )
76
+ SQL
77
+
78
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
79
+
80
+ expected = {
81
+ 'fingerprint' => 'fa9c9d26757c4f9b',
82
+ 'query' => ddl_sql,
83
+ 'query_length' => 287,
84
+ 'queryid' => '1234'
85
+ }
86
+ assert_equal expected, record
87
+ end
88
+
89
+ test 'set command' do
90
+ d = create_driver
91
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
92
+
93
+ expected = {
94
+ 'fingerprint' => '23f8d6eb1d3125c3',
95
+ 'query' => 'SET TIME ZONE $1',
96
+ 'query_length' => 23,
97
+ 'queryid' => '1234'
98
+ }
99
+
100
+ assert_equal expected, record
101
+ end
102
+
103
+ test 'unparseable sql' do
104
+ d = create_driver
105
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
106
+
107
+ expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
+ assert_equal expected, record
109
+ end
110
+ end
111
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-20 00:00:00.000000000 Z
11
+ date: 2021-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -30,20 +30,34 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: pg_query
35
49
  requirement: !ruby/object:Gem::Requirement
36
50
  requirements:
37
51
  - - "~>"
38
52
  - !ruby/object:Gem::Version
39
- version: '1.3'
53
+ version: '2.0'
40
54
  type: :runtime
41
55
  prerelease: false
42
56
  version_requirements: !ruby/object:Gem::Requirement
43
57
  requirements:
44
58
  - - "~>"
45
59
  - !ruby/object:Gem::Version
46
- version: '1.3'
60
+ version: '2.0'
47
61
  - !ruby/object:Gem::Dependency
48
62
  name: rake
49
63
  requirement: !ruby/object:Gem::Requirement
@@ -80,21 +94,27 @@ extensions: []
80
94
  extra_rdoc_files: []
81
95
  files:
82
96
  - ".gitlab-ci.yml"
97
+ - Dockerfile
83
98
  - Gemfile
84
- - Gemfile.lock
85
99
  - LICENSE
86
100
  - README.md
87
101
  - Rakefile
102
+ - docker-compose.yml
103
+ - example-fluentd.conf
88
104
  - fluent-plugin-postgresql-csvlog.gemspec
89
105
  - lib/fluent/plugin/filter_marginalia.rb
90
106
  - lib/fluent/plugin/filter_postgresql_redactor.rb
91
107
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
108
+ - lib/fluent/plugin/in_pg_stat_statements.rb
92
109
  - lib/fluent/plugin/parser_multiline_csv.rb
110
+ - sql/create_extension.sql
93
111
  - test/helper.rb
112
+ - test/plugin/itest_in_pg_stat_statements.rb
94
113
  - test/plugin/test_filter_marginalia.rb
95
114
  - test/plugin/test_filter_postgresql_redactor.rb
96
115
  - test/plugin/test_filter_postgresql_slowlog.rb
97
- homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
116
+ - test/plugin/test_in_pg_stat_statements.rb
117
+ homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
98
118
  licenses: []
99
119
  metadata: {}
100
120
  post_install_message:
@@ -118,6 +138,8 @@ specification_version: 4
118
138
  summary: fluentd plugins to work with PostgreSQL CSV logs
119
139
  test_files:
120
140
  - test/helper.rb
141
+ - test/plugin/itest_in_pg_stat_statements.rb
121
142
  - test/plugin/test_filter_marginalia.rb
122
143
  - test/plugin/test_filter_postgresql_redactor.rb
123
144
  - test/plugin/test_filter_postgresql_slowlog.rb
145
+ - test/plugin/test_in_pg_stat_statements.rb
data/Gemfile.lock DELETED
@@ -1,50 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- fluent-plugin-postgresql-csvlog (0.0.2)
5
- fluentd (>= 1.0, < 2)
6
- pg_query (~> 1.3)
7
-
8
- GEM
9
- remote: https://rubygems.org/
10
- specs:
11
- concurrent-ruby (1.1.8)
12
- cool.io (1.7.0)
13
- fluentd (1.12.0)
14
- bundler
15
- cool.io (>= 1.4.5, < 2.0.0)
16
- http_parser.rb (>= 0.5.1, < 0.7.0)
17
- msgpack (>= 1.3.1, < 2.0.0)
18
- serverengine (>= 2.2.2, < 3.0.0)
19
- sigdump (~> 0.2.2)
20
- strptime (>= 0.2.2, < 1.0.0)
21
- tzinfo (>= 1.0, < 3.0)
22
- tzinfo-data (~> 1.0)
23
- yajl-ruby (~> 1.0)
24
- http_parser.rb (0.6.0)
25
- msgpack (1.4.2)
26
- pg_query (1.3.0)
27
- power_assert (2.0.0)
28
- rake (13.0.3)
29
- serverengine (2.2.2)
30
- sigdump (~> 0.2.2)
31
- sigdump (0.2.4)
32
- strptime (0.2.5)
33
- test-unit (3.4.0)
34
- power_assert
35
- tzinfo (2.0.4)
36
- concurrent-ruby (~> 1.0)
37
- tzinfo-data (1.2021.1)
38
- tzinfo (>= 1.0.0)
39
- yajl-ruby (1.4.1)
40
-
41
- PLATFORMS
42
- ruby
43
-
44
- DEPENDENCIES
45
- fluent-plugin-postgresql-csvlog!
46
- rake
47
- test-unit (~> 3.2)
48
-
49
- BUNDLED WITH
50
- 2.1.4