fluent-plugin-postgresql-csvlog 0.0.2 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 002b7169e3f3ac6493eb09dca5c2e3820a944f38c64a0b45641f3cdbd1717ddf
4
- data.tar.gz: eac3646a2404665924c4fa30114b91ecce49dbadef3919d89eab458450568328
3
+ metadata.gz: 81cfb53854390b0aae43fd6f8cfaf05e11c4251a4442bf40ccdee016db11b3f6
4
+ data.tar.gz: 0e705c2af91af84e400e96d41b5275f0d01f4d6dc4ec0a37b3bfed0974e8a424
5
5
  SHA512:
6
- metadata.gz: 354478f3573f0934dcee72305069aa8f25333087d1667fc4e127963b8f8f955b66a02462925e4672f27f80e43b772640a68a4760266f461704bd9486ad66a3ef
7
- data.tar.gz: f6d89c2db73d337b1aa52e9838ab08f6f3bda80e7053307482fa5b284b4b6f81ae0389e2a20411fbcc383c878b5309a320946d5849d43fa466e469037b9d5102
6
+ metadata.gz: 24bb505d30f06847e16b2f631e1e2ce011559edcc11724556d62abc0089ee99b00e5b911c1f23d87740720fd4d57235af1ee104991cbff72ac8ba790c700280a
7
+ data.tar.gz: d42ae8976276b4392c1963d6c83e7691732f7eec8dbf87eaf270e3b4547ceba8f9f2b76056006163d6b594e0791b4a9b49183c6ae1793610e7f6555a92b9d190
data/.gitlab-ci.yml CHANGED
@@ -2,9 +2,28 @@ image: "ruby:2.7"
2
2
 
3
3
  test:
4
4
  before_script:
5
+ - bundle config set path vendor
5
6
  - bundle install --jobs $(nproc)
6
7
  script:
7
8
  - bundle exec rake test
8
9
  cache:
9
10
  paths:
10
11
  - vendor/ruby
12
+
13
+ # integration tests
14
+ itest:
15
+ services:
16
+ - name: postgres:12
17
+ alias: postgres
18
+ command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
+ variables:
20
+ POSTGRES_USER: testuser
21
+ POSTGRES_PASSWORD: testpass
22
+ before_script:
23
+ - bundle config set path vendor
24
+ - bundle install --jobs $(nproc)
25
+ script:
26
+ - bundle exec rake itest
27
+ cache:
28
+ paths:
29
+ - vendor/ruby
data/Dockerfile ADDED
@@ -0,0 +1,18 @@
1
+ # Dockerfile useful for manual testing purposes
2
+ FROM fluent/fluentd:v1.13-1
3
+ USER root
4
+ RUN apk add bash alpine-sdk postgresql-dev postgresql-client ruby-dev
5
+
6
+ WORKDIR /src/
7
+
8
+ ADD Gemfile /src/
9
+ ADD fluent-plugin-postgresql-csvlog.gemspec /src/
10
+
11
+ ADD . /src/
12
+
13
+ RUN bundle install
14
+ RUN rake build
15
+
16
+ RUN fluent-gem install pkg/*.gem
17
+
18
+ ENTRYPOINT [ "/bin/bash"]
data/README.md CHANGED
@@ -47,15 +47,20 @@ ingest and parse PostgreSQL CSV logs:
47
47
 
48
48
  <filter postgres.postgres_csv>
49
49
  @type postgresql_slowlog
50
+ output_key query
50
51
  </filter>
51
52
 
52
53
  <filter postgres.postgres_csv>
53
54
  @type postgresql_redactor
55
+ input_key query
56
+ output_key sql
57
+ fingerprint_key fingerprint
54
58
  </filter>
55
59
 
56
60
  <filter postgres.postgres_csv>
57
61
  @type marginalia
58
62
  key sql
63
+ strip_comment true
59
64
  </filter>
60
65
 
61
66
  # Output resulting JSON file to a directory in /tmp
data/Rakefile CHANGED
@@ -9,4 +9,10 @@ Rake::TestTask.new(:test) do |test|
9
9
  test.verbose = true
10
10
  end
11
11
 
12
+ Rake::TestTask.new(:itest) do |test|
13
+ test.libs << 'lib' << 'test'
14
+ test.test_files = FileList['test/**/itest_*.rb']
15
+ test.verbose = true
16
+ end
17
+
12
18
  task :default => [:build]
@@ -0,0 +1,19 @@
1
+ # Docker Compose setup useful for testing and development purposes
2
+ version: "3.9"
3
+ services:
4
+ fluentd:
5
+ build: .
6
+ links:
7
+ - postgres
8
+ entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
9
+ postgres:
10
+ image: postgres
11
+ restart: always
12
+ environment:
13
+ - POSTGRES_USER=testuser
14
+ - POSTGRES_PASSWORD=testpass
15
+ ports:
16
+ - '5438:5432'
17
+ command: postgres -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all
18
+ volumes:
19
+ - ./sql/create_extension.sql:/docker-entrypoint-initdb.d/create_extension.sql
@@ -0,0 +1,12 @@
1
+ <source>
2
+ @type pg_stat_statements
3
+ tag postgres.pg_stat_statements
4
+ host postgres
5
+ username testuser
6
+ password testpass
7
+ interval 1
8
+ </source>
9
+
10
+ <match postgres.pg_stat_statements>
11
+ @type stdout
12
+ </match>
@@ -2,10 +2,10 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.0.2'
5
+ s.version = '0.3.1'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
- s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
9
9
  s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
10
  s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
11
 
@@ -15,7 +15,8 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
- s.add_dependency 'pg_query', '~> 1.3'
18
+ s.add_dependency 'pg', '~> 1.1'
19
+ s.add_dependency 'pg_query', '~> 2.0'
19
20
 
20
21
  s.add_development_dependency 'rake'
21
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -16,9 +16,11 @@ module Fluent
16
16
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
17
17
  config_param :key, :string, default: 'sql'
18
18
 
19
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/).*}m.freeze
20
- MARGINALIA_APPENDED_REGEXP = %r{.*(?<comment>/\*.*\*/)$}m.freeze
21
- MARGINALIA_KEY_VALUE_REGEXP = /^(?<key>.*):?(?<value>.*)$/.freeze
19
+ desc 'Whether to strip the comment from the record specified by key'
20
+ config_param :strip_comment, :bool, default: true
21
+
22
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
22
24
 
23
25
  def filter(_tag, _time, record)
24
26
  parse_comments(record)
@@ -38,15 +40,9 @@ module Fluent
38
40
  return unless comment_match
39
41
 
40
42
  entries = extract_entries(comment_match['comment'])
43
+ parse_entries(entries, record)
41
44
 
42
- entries.each do |component|
43
- data = component.split(':', 2)
44
-
45
- break unless data.length == 2
46
-
47
- stored_key = store_key(record, data[0])
48
- record[stored_key] = data[1]
49
- end
45
+ record[@key] = comment_match['sql'].strip if @strip_comment
50
46
  end
51
47
 
52
48
  def match_marginalia_comment(sql)
@@ -73,7 +69,21 @@ module Fluent
73
69
  comment.gsub!(%r{\*/$}, '')
74
70
  end
75
71
 
72
+ def parse_entries(entries, record)
73
+ entries.each do |component|
74
+ data = component.split(':', 2)
75
+
76
+ break unless data.length == 2
77
+
78
+ stored_key = store_key(record, data[0])
79
+ record[stored_key] = data[1]
80
+ end
81
+ end
82
+
76
83
  def store_key(record, component_key)
84
+ # In case there is a conflict with the Marginalia key
85
+ # (e.g. `correlation_id`), we use the base key
86
+ # (`sql_correlation_id`) instead.
77
87
  if record.key?(component_key)
78
88
  "#{@key}_#{component_key}"
79
89
  else
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fluent/plugin/filter'
2
4
  require 'pg_query'
3
5
 
@@ -5,24 +7,28 @@ module Fluent::Plugin
5
7
  class PostgreSQLRedactor < Filter
6
8
  Fluent::Plugin.register_filter('postgresql_redactor', self)
7
9
 
8
- def configure(conf)
9
- super
10
- end
10
+ desc 'Input field to parse for SQL queries'
11
+ config_param :input_key, :string, default: 'query'
12
+
13
+ desc 'Output field to store SQL queries'
14
+ config_param :output_key, :string, default: 'sql'
15
+
16
+ desc 'Name of field to store SQL query fingerprint'
17
+ config_param :fingerprint_key, :string, default: 'fingerprint'
11
18
 
12
19
  def filter(_tag, _time, record)
13
- statement = record['statement']
20
+ statement = record[@input_key]
14
21
 
15
22
  return record unless statement
16
23
 
17
24
  normalized = PgQuery.normalize(statement)
18
- record.delete('statement')
19
- record['sql'] = normalized
20
- record.delete('message')
25
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
26
+
27
+ record.delete(@input_key)
28
+ record[@output_key] = normalized
21
29
 
22
30
  record
23
31
  rescue PgQuery::ParseError
24
- # pg_query currently only supports PostgresQL 10:
25
- # https://github.com/lfittl/pg_query/issues/184
26
32
  record['pg_query_error'] = true
27
33
  record
28
34
  end
@@ -12,6 +12,9 @@ module Fluent
12
12
  class PostgreSQLSlowLog < Filter
13
13
  Fluent::Plugin.register_filter('postgresql_slowlog', self)
14
14
 
15
+ desc 'Field to output SQL queries'
16
+ config_param :output_key, :string, default: 'query'
17
+
15
18
  SLOWLOG_REGEXP = /^duration: (\d+(?:\.\d+)?) ms .*?:\s*(.*)/m.freeze
16
19
 
17
20
  def filter(_tag, _time, record)
@@ -20,7 +23,8 @@ module Fluent
20
23
  # rubocop:disable Style/PerlBackrefs
21
24
  if record['message'] =~ SLOWLOG_REGEXP
22
25
  record['duration_s'] = $1.to_f / 1000.0
23
- record['statement'] = $2
26
+ record[@output_key] = $2
27
+ record.delete('message')
24
28
  end
25
29
  # rubocop:enable Style/PerlBackrefs
26
30
 
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent::Plugin
8
+ # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
9
+ # for queryid to query mappings. These are then normalized for security purposes
10
+ # fingerprinted and emitted as records with the following format:
11
+ # {
12
+ # 'fingerprint' => '8a6e9896bd9048a2',
13
+ # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
14
+ # 'query_length' => 58,
15
+ # 'queryid' => 3239318621761098074
16
+ # }
17
+ class PgStatStatementsInput < Input
18
+ Fluent::Plugin.register_input('pg_stat_statements', self)
19
+
20
+ desc 'PostgreSQL host'
21
+ config_param :host, :string
22
+
23
+ desc 'RDBMS port (default: 5432)'
24
+ config_param :port, :integer, default: 5432
25
+
26
+ desc 'login user name'
27
+ config_param :username, :string, default: nil
28
+
29
+ desc 'postgres db'
30
+ config_param :dbname, :string, default: nil
31
+
32
+ desc 'login password'
33
+ config_param :password, :string, default: nil, secret: true
34
+
35
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
+ # for options
37
+ desc 'postgres sslmode'
38
+ config_param :sslmode, :string, default: 'prefer'
39
+
40
+ desc 'tag'
41
+ config_param :tag, :string, default: nil
42
+
43
+ desc 'interval in second to run query'
44
+ config_param :interval, :time, default: 300
45
+
46
+ desc 'Name of field to store SQL query fingerprint'
47
+ config_param :fingerprint_key, :string, default: 'fingerprint'
48
+
49
+ def start
50
+ @stop_flag = false
51
+ @thread = Thread.new(&method(:thread_main))
52
+ end
53
+
54
+ def shutdown
55
+ @stop_flag = true
56
+
57
+ # Interrupt thread and wait for it to finish
58
+ Thread.new { @thread.run } if @thread
59
+ @thread.join
60
+ end
61
+
62
+ def thread_main
63
+ until @stop_flag
64
+ sleep @interval
65
+ break if @stop_flag
66
+
67
+ begin
68
+ with_connection do |conn|
69
+ emit_statements_to_stream(conn)
70
+ end
71
+ rescue StandardError => e
72
+ log.error 'unexpected error', error: e.message, error_class: e.class
73
+ log.error_backtrace e.backtrace
74
+ end
75
+ end
76
+ end
77
+
78
+ # Returns a fluentd record for a query row
79
+ def record_for_row(row)
80
+ query = row['query']
81
+
82
+ # We record the query_length as it will help in understanding whether unparseable
83
+ # queries are truncated.
84
+ record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
85
+
86
+ return record unless query
87
+
88
+ normalized = PgQuery.normalize(query)
89
+ record['query'] = normalized
90
+
91
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
92
+
93
+ record
94
+ rescue PgQuery::ParseError
95
+ record['query_unparseable'] = true
96
+
97
+ record
98
+ end
99
+
100
+ private
101
+
102
+ # Query the database and emit statements to fluentd router
103
+ def emit_statements_to_stream(conn)
104
+ me = Fluent::MultiEventStream.new
105
+
106
+ now = Fluent::Engine.now
107
+ conn.exec('SELECT queryid, query FROM pg_stat_statements').each do |row|
108
+ record = record_for_row(row)
109
+ me.add(now, record)
110
+ end
111
+
112
+ @router.emit_stream(@tag, me)
113
+ end
114
+
115
+ # Since this query is very infrequent, and it may be communicating directly
116
+ # with postgres without pgbouncer, don't use a persistent connection and
117
+ # ensure that it is properly closed
118
+ def with_connection(&block)
119
+ conn = PG.connect(
120
+ host: @host,
121
+ dbname: @dbname,
122
+ sslmode: @sslmode,
123
+ user: @username,
124
+ password: @password
125
+ )
126
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
+
128
+ begin
129
+ block.call(conn)
130
+ ensure
131
+ # Always close the connection
132
+ conn.finish
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1 @@
1
+ CREATE EXTENSION pg_stat_statements;
data/test/helper.rb CHANGED
@@ -7,10 +7,13 @@ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
7
  $LOAD_PATH.unshift(__dir__)
8
8
  require 'fluent/test'
9
9
  require 'fluent/test/driver/filter'
10
+ require 'fluent/test/driver/input'
10
11
  require 'fluent/test/helpers'
11
12
 
12
13
  Test::Unit::TestCase.include(Fluent::Test::Helpers)
14
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
13
15
 
14
16
  require 'fluent/plugin/filter_postgresql_slowlog'
15
17
  require 'fluent/plugin/filter_postgresql_redactor'
16
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/in_pg_stat_statements'
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
6
+ # The defaults values work with the configuration in .gitlab-ci.yml on the postgres service
7
+ # Override with env vars for local development
8
+ HOST = ENV.fetch('PG_TEST_HOST', 'postgres')
9
+ USERNAME = ENV.fetch('PG_TEST_USER', 'testuser')
10
+ PASSWORD = ENV.fetch('PG_TEST_PASSWORD', 'testpass')
11
+
12
+ def setup
13
+ Fluent::Test.setup
14
+
15
+ @conn = PG.connect(
16
+ host: HOST,
17
+ user: USERNAME,
18
+ password: PASSWORD
19
+ )
20
+
21
+ try_setup_extension
22
+ create_known_statement
23
+ end
24
+
25
+ def teardown
26
+ @conn&.finish
27
+ end
28
+
29
+ # Setup pg_stat_statements extension
30
+ def try_setup_extension
31
+ @conn.exec('CREATE EXTENSION pg_stat_statements')
32
+ rescue PG::DuplicateObject
33
+ end
34
+
35
+ # This statement gives us something to look for in the emitted stream
36
+ def create_known_statement
37
+ @conn.exec('SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT 1')
38
+ end
39
+
40
+ VALID_CONFIG = %(
41
+ tag postgres.pg_stat_statements
42
+ host #{HOST}
43
+ username #{USERNAME}
44
+ password #{PASSWORD}
45
+ interval 1
46
+ )
47
+
48
+ INVALID_CONFIG = %(
49
+ host 'invalid_host.dne'
50
+ port 1234
51
+ username #{USERNAME}
52
+ password #{PASSWORD}
53
+ interval 1
54
+ )
55
+
56
+ def create_driver(config)
57
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatStatementsInput).configure(config)
58
+ end
59
+
60
+ sub_test_case 'configuration' do
61
+ test 'connects' do
62
+ d = create_driver(VALID_CONFIG)
63
+
64
+ emits = []
65
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
66
+ d.run(num_waits = 50) do
67
+ emits = d.emits
68
+ end
69
+
70
+ assert_false emits.empty?
71
+ end
72
+
73
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
74
+ # the fluentd configuration to fail. We would rather retry until we get a connection
75
+ test 'connects for an invalid config' do
76
+ d = create_driver(INVALID_CONFIG)
77
+
78
+ emits = []
79
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
80
+ d.run(num_waits = 50) do
81
+ emits = d.emits
82
+ end
83
+
84
+ assert_true emits.empty?
85
+ end
86
+ end
87
+
88
+ sub_test_case 'execution' do
89
+ test 'connects' do
90
+ d = create_driver(VALID_CONFIG)
91
+
92
+ emits = []
93
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
94
+ d.run(num_waits = 50) do
95
+ emits = d.emits
96
+ end
97
+
98
+ expected_record = {
99
+ 'fingerprint' => '8a6e9896bd9048a2',
100
+ 'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
101
+ 'query_length' => 58,
102
+ 'queryid' => 3_239_318_621_761_098_074
103
+ }
104
+ known_statement_event = emits.find do |event|
105
+ record = event[2]
106
+ record['query'] == expected_record['query']
107
+ end
108
+
109
+ assert_false known_statement_event.nil?
110
+
111
+ tag = known_statement_event[0]
112
+ record = known_statement_event[2]
113
+
114
+ assert_equal 'postgres.pg_stat_statements', tag
115
+ assert_equal expected_record['fingerprint'], record['fingerprint']
116
+ assert_equal expected_record['query_length'], record['query_length']
117
+ assert_true expected_record.include? 'queryid'
118
+ end
119
+ end
120
+ end
@@ -22,15 +22,16 @@ class Marginalia < Test::Unit::TestCase
22
22
  inputs = [
23
23
  { 'statement' => 'SELECT * FROM projects' },
24
24
  { 'statement' => 'SELECT COUNT(*) FROM "projects" /* this is just a comment */' },
25
- { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' }
25
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' },
26
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/' }
26
27
  ]
27
28
 
28
29
  d.run(default_tag: @tag) do
29
30
  inputs.each { |input| d.feed(input) }
30
31
  end
31
32
 
32
- assert_equal(inputs[0].merge, d.filtered[0].last)
33
- assert_equal(inputs[1].merge, d.filtered[1].last)
33
+ assert_equal(inputs[0], d.filtered[0].last)
34
+ assert_equal(inputs[1], d.filtered[1].last)
34
35
  assert_equal(inputs[2].merge(
35
36
  {
36
37
  'application' => 'sidekiq',
@@ -40,6 +41,19 @@ class Marginalia < Test::Unit::TestCase
40
41
  }
41
42
  ),
42
43
  d.filtered[2].last)
44
+ assert_equal(inputs[3].merge(
45
+ {
46
+ 'application' => 'web',
47
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
48
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
49
+ }
50
+ ),
51
+ d.filtered[3].last)
52
+
53
+ assert_equal('SELECT * FROM projects', d.filtered[0].last['statement'])
54
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
55
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
56
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[3].last['statement'])
43
57
  end
44
58
 
45
59
  test 'parses prepended Marginalia comments' do
@@ -48,6 +62,7 @@ class Marginalia < Test::Unit::TestCase
48
62
  inputs = [
49
63
  { 'statement' => '/* this is just a comment */ SELECT COUNT(*) FROM "projects"' },
50
64
  { 'statement' => '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"' },
65
+ { 'statement' => '/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT COUNT(*) FROM "projects"' },
51
66
  { 'statement' => '/*application:sidekiq*/ SELECT COUNT(*) FROM "projects"',
52
67
  'application' => 'test-conflict' }
53
68
  ]
@@ -56,7 +71,7 @@ class Marginalia < Test::Unit::TestCase
56
71
  inputs.each { |input| d.feed(input) }
57
72
  end
58
73
 
59
- assert_equal(inputs[0].merge, d.filtered[0].last)
74
+ assert_equal(inputs[0], d.filtered[0].last)
60
75
  assert_equal(inputs[1].merge(
61
76
  {
62
77
  'application' => 'sidekiq',
@@ -68,9 +83,45 @@ class Marginalia < Test::Unit::TestCase
68
83
  d.filtered[1].last)
69
84
  assert_equal(inputs[2].merge(
70
85
  {
71
- 'statement_application' => 'sidekiq'
86
+ 'application' => 'web',
87
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
88
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
72
89
  }
73
90
  ),
74
91
  d.filtered[2].last)
92
+ assert_equal(inputs[3].merge(
93
+ {
94
+ 'statement_application' => 'sidekiq'
95
+ }
96
+ ),
97
+ d.filtered[3].last)
98
+
99
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[0].last['statement'])
100
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
101
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
102
+ end
103
+
104
+ test 'parses Marginalia comments with strip_comment disabled' do
105
+ d = create_driver(
106
+ <<~CONF
107
+ strip_comment false
108
+ key sql
109
+ CONF
110
+ )
111
+
112
+ sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/)
113
+ appended_sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/')
114
+
115
+ inputs = [
116
+ { 'sql' => sql },
117
+ { 'sql' => appended_sql }
118
+ ]
119
+
120
+ d.run(default_tag: @tag) do
121
+ inputs.each { |input| d.feed(input) }
122
+ end
123
+
124
+ assert_equal(sql, d.filtered[0].last['sql'])
125
+ assert_equal(appended_sql, d.filtered[1].last['sql'])
75
126
  end
76
127
  end
@@ -8,11 +8,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
8
8
  @tag = 'test.tag'
9
9
  end
10
10
 
11
- CONFIG = '
12
- <filter test.tag>
13
- @type postgresql_redactor
14
- </filter>
15
- '
11
+ CONFIG = ''
16
12
 
17
13
  def create_driver(conf = CONFIG)
18
14
  Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLRedactor).configure(conf)
@@ -23,7 +19,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
23
19
 
24
20
  inputs = [
25
21
  { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
26
- 'statement' => %(SELECT * FROM projects WHERE id = 1),
22
+ 'query' => %(SELECT * FROM projects WHERE id = 1),
27
23
  'duration_s' => 2.3571 }
28
24
  ]
29
25
 
@@ -31,20 +27,43 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
31
27
  inputs.each { |input| d.feed(input) }
32
28
  end
33
29
 
34
- assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
30
+ assert_equal(%w[duration_s fingerprint message sql], d.filtered[0].last.keys.sort)
35
31
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
32
  end
37
33
 
38
34
  test 'handles parse errors' do
39
35
  d = create_driver
40
36
 
41
- input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
37
+ input = { 'query' => 'create index something test (bla) include (bar)' }
42
38
 
43
39
  d.run(default_tag: @tag) do
44
40
  d.feed(input)
45
41
  end
46
42
 
47
- assert_equal(%w[pg_query_error statement], d.filtered[0].last.keys.sort)
48
- assert_equal(input['statement'], d.filtered[0].last['statement'])
43
+ assert_equal(%w[pg_query_error query], d.filtered[0].last.keys.sort)
44
+ assert_equal(input['query'], d.filtered[0].last['query'])
45
+ end
46
+
47
+ test 'uses configured input and output keys' do
48
+ d = create_driver(<<~CONF
49
+ input_key sql
50
+ output_key out_sql
51
+ CONF
52
+ )
53
+
54
+ inputs = [
55
+ {
56
+ 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
57
+ 'sql' => %(SELECT * FROM projects WHERE id = 1),
58
+ 'duration_s' => 2.3571
59
+ }
60
+ ]
61
+
62
+ d.run(default_tag: @tag) do
63
+ inputs.each { |input| d.feed(input) }
64
+ end
65
+
66
+ assert_equal(%w[duration_s fingerprint message out_sql], d.filtered[0].last.keys.sort)
67
+ assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['out_sql'])
49
68
  end
50
69
  end
@@ -32,18 +32,21 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
32
32
 
33
33
  assert_equal(inputs[0].merge(
34
34
  {
35
- 'statement' => 'SELECT * FROM projects',
35
+ 'query' => 'SELECT * FROM projects',
36
36
  'duration_s' => 2.3571
37
37
  }
38
38
  ),
39
39
  d.filtered[0].last)
40
40
  assert_equal(inputs[1].merge(
41
41
  {
42
- 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
42
+ 'query' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
43
43
  'duration_s' => 1.873345
44
44
  }
45
45
  ),
46
46
  d.filtered[1].last)
47
+
48
+ assert_equal(%w[duration_s query], d.filtered[0].last.keys.sort)
49
+ assert_equal(%w[duration_s query], d.filtered[1].last.keys.sort)
47
50
  end
48
51
 
49
52
  test 'ignores messages not having to do with slow logs' do
@@ -55,5 +58,32 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
55
58
  end
56
59
 
57
60
  assert_equal(input, d.filtered[0].last)
61
+ assert_equal(%w[message], d.filtered[0].last.keys.sort)
62
+ end
63
+
64
+ test 'outputs slow log entries to configured output key' do
65
+ d = create_driver(
66
+ <<~CONF
67
+ output_key my_key
68
+ CONF
69
+ )
70
+
71
+ inputs = [
72
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects' }
73
+ ]
74
+
75
+ d.run(default_tag: @tag) do
76
+ inputs.each { |input| d.feed(input) }
77
+ end
78
+
79
+ assert_equal(inputs[0].merge(
80
+ {
81
+ 'my_key' => 'SELECT * FROM projects',
82
+ 'duration_s' => 2.3571
83
+ }
84
+ ),
85
+ d.filtered[0].last)
86
+
87
+ assert_equal(%w[duration_s my_key], d.filtered[0].last.keys.sort)
58
88
  end
59
89
  end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %(
11
+ tag postgres.pg_stat_statements
12
+ host localhost
13
+ port 1234
14
+ dbname gitlab
15
+ sslmode require
16
+ username moo
17
+ password secret
18
+ interval 600
19
+ fingerprint_key fingerprint
20
+ )
21
+
22
+ def create_driver
23
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatStatementsInput).configure(CONFIG)
24
+ end
25
+
26
+ sub_test_case 'configuration' do
27
+ test 'basic configuration' do
28
+ d = create_driver
29
+
30
+ assert_equal 'postgres.pg_stat_statements', d.instance.tag
31
+ assert_equal 'localhost', d.instance.host
32
+ assert_equal 1234, d.instance.port
33
+ assert_equal 'gitlab', d.instance.dbname
34
+ assert_equal 'require', d.instance.sslmode
35
+ assert_equal 'moo', d.instance.username
36
+ assert_equal 'secret', d.instance.password
37
+ assert_equal 600, d.instance.interval
38
+ assert_equal 'fingerprint', d.instance.fingerprint_key
39
+ end
40
+ end
41
+
42
+ sub_test_case 'execution' do
43
+ test 'sql' do
44
+ d = create_driver
45
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
46
+
47
+ expected = {
48
+ 'fingerprint' => 'c071dee80d466e7d',
49
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
50
+ 'query_length' => 37,
51
+ 'queryid' => '1234'
52
+ }
53
+
54
+ assert_equal expected, record
55
+ end
56
+
57
+ test 'nil query' do
58
+ d = create_driver
59
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
60
+
61
+ expected = { 'query_length' => nil, 'queryid' => '1234' }
62
+ assert_equal expected, record
63
+ end
64
+
65
+ test 'ddl query' do
66
+ d = create_driver
67
+ ddl_sql = <<-SQL
68
+ CREATE TABLE accounts (
69
+ user_id serial PRIMARY KEY,
70
+ username VARCHAR(50) UNIQUE NOT NULL,
71
+ password VARCHAR(50) NOT NULL,
72
+ email VARCHAR(255) UNIQUE NOT NULL,
73
+ created_on TIMESTAMP NOT NULL,
74
+ last_login TIMESTAMP
75
+ )
76
+ SQL
77
+
78
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
79
+
80
+ expected = {
81
+ 'fingerprint' => 'fa9c9d26757c4f9b',
82
+ 'query' => ddl_sql,
83
+ 'query_length' => 287,
84
+ 'queryid' => '1234'
85
+ }
86
+ assert_equal expected, record
87
+ end
88
+
89
+ test 'set command' do
90
+ d = create_driver
91
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
92
+
93
+ expected = {
94
+ 'fingerprint' => '23f8d6eb1d3125c3',
95
+ 'query' => 'SET TIME ZONE $1',
96
+ 'query_length' => 23,
97
+ 'queryid' => '1234'
98
+ }
99
+
100
+ assert_equal expected, record
101
+ end
102
+
103
+ test 'unparseable sql' do
104
+ d = create_driver
105
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
106
+
107
+ expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
+ assert_equal expected, record
109
+ end
110
+ end
111
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-20 00:00:00.000000000 Z
11
+ date: 2021-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -30,20 +30,34 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: pg_query
35
49
  requirement: !ruby/object:Gem::Requirement
36
50
  requirements:
37
51
  - - "~>"
38
52
  - !ruby/object:Gem::Version
39
- version: '1.3'
53
+ version: '2.0'
40
54
  type: :runtime
41
55
  prerelease: false
42
56
  version_requirements: !ruby/object:Gem::Requirement
43
57
  requirements:
44
58
  - - "~>"
45
59
  - !ruby/object:Gem::Version
46
- version: '1.3'
60
+ version: '2.0'
47
61
  - !ruby/object:Gem::Dependency
48
62
  name: rake
49
63
  requirement: !ruby/object:Gem::Requirement
@@ -80,21 +94,27 @@ extensions: []
80
94
  extra_rdoc_files: []
81
95
  files:
82
96
  - ".gitlab-ci.yml"
97
+ - Dockerfile
83
98
  - Gemfile
84
- - Gemfile.lock
85
99
  - LICENSE
86
100
  - README.md
87
101
  - Rakefile
102
+ - docker-compose.yml
103
+ - example-fluentd.conf
88
104
  - fluent-plugin-postgresql-csvlog.gemspec
89
105
  - lib/fluent/plugin/filter_marginalia.rb
90
106
  - lib/fluent/plugin/filter_postgresql_redactor.rb
91
107
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
108
+ - lib/fluent/plugin/in_pg_stat_statements.rb
92
109
  - lib/fluent/plugin/parser_multiline_csv.rb
110
+ - sql/create_extension.sql
93
111
  - test/helper.rb
112
+ - test/plugin/itest_in_pg_stat_statements.rb
94
113
  - test/plugin/test_filter_marginalia.rb
95
114
  - test/plugin/test_filter_postgresql_redactor.rb
96
115
  - test/plugin/test_filter_postgresql_slowlog.rb
97
- homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
116
+ - test/plugin/test_in_pg_stat_statements.rb
117
+ homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
98
118
  licenses: []
99
119
  metadata: {}
100
120
  post_install_message:
@@ -118,6 +138,8 @@ specification_version: 4
118
138
  summary: fluentd plugins to work with PostgreSQL CSV logs
119
139
  test_files:
120
140
  - test/helper.rb
141
+ - test/plugin/itest_in_pg_stat_statements.rb
121
142
  - test/plugin/test_filter_marginalia.rb
122
143
  - test/plugin/test_filter_postgresql_redactor.rb
123
144
  - test/plugin/test_filter_postgresql_slowlog.rb
145
+ - test/plugin/test_in_pg_stat_statements.rb
data/Gemfile.lock DELETED
@@ -1,50 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- fluent-plugin-postgresql-csvlog (0.0.2)
5
- fluentd (>= 1.0, < 2)
6
- pg_query (~> 1.3)
7
-
8
- GEM
9
- remote: https://rubygems.org/
10
- specs:
11
- concurrent-ruby (1.1.8)
12
- cool.io (1.7.0)
13
- fluentd (1.12.0)
14
- bundler
15
- cool.io (>= 1.4.5, < 2.0.0)
16
- http_parser.rb (>= 0.5.1, < 0.7.0)
17
- msgpack (>= 1.3.1, < 2.0.0)
18
- serverengine (>= 2.2.2, < 3.0.0)
19
- sigdump (~> 0.2.2)
20
- strptime (>= 0.2.2, < 1.0.0)
21
- tzinfo (>= 1.0, < 3.0)
22
- tzinfo-data (~> 1.0)
23
- yajl-ruby (~> 1.0)
24
- http_parser.rb (0.6.0)
25
- msgpack (1.4.2)
26
- pg_query (1.3.0)
27
- power_assert (2.0.0)
28
- rake (13.0.3)
29
- serverengine (2.2.2)
30
- sigdump (~> 0.2.2)
31
- sigdump (0.2.4)
32
- strptime (0.2.5)
33
- test-unit (3.4.0)
34
- power_assert
35
- tzinfo (2.0.4)
36
- concurrent-ruby (~> 1.0)
37
- tzinfo-data (1.2021.1)
38
- tzinfo (>= 1.0.0)
39
- yajl-ruby (1.4.1)
40
-
41
- PLATFORMS
42
- ruby
43
-
44
- DEPENDENCIES
45
- fluent-plugin-postgresql-csvlog!
46
- rake
47
- test-unit (~> 3.2)
48
-
49
- BUNDLED WITH
50
- 2.1.4