fluent-plugin-postgresql-csvlog 0.0.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fd4d916e4d8cc9ebb39d62e847de1c2dca1ce0175d738ca55430a962b3283ab
4
- data.tar.gz: b3e6baacf5bd683c39eca587c55ea0326ffe18a55d3b4f8b23ba435642641b8c
3
+ metadata.gz: bb322f50f848a7b196962dae916a75f30a77f1461167bb314494ae203f8ef9d6
4
+ data.tar.gz: 077266e6c7f3589813d0c4db8ddccfde76f7d6ecd0b6d6388b67a4e6328ad858
5
5
  SHA512:
6
- metadata.gz: 2e783b77515eb3ec55b4684e11d3014fdea761b057d22fb3935eafd7064d9fcb7038b8f78f7fa7e9aa238d339c8a5c3d1cac358a430c207f6ba9cd19bf8ae55a
7
- data.tar.gz: 77dace38e2de7a851547ee25502943f0d610cceefa8ee4d1194e084301852bf3b1e8fd93e79144f6c81865d7aadfe3c3705a210f3ca4dee2029e2daa5fbdf989
6
+ metadata.gz: 9b130aa84b1285a62466b3bf68ff6d02acc7e95cd53cc91ee7e34ec81ee4440e06358738790f86f8d436cb8a72ad232f12ac2c0f9d8aca16fd5c6ba19bbdfe49
7
+ data.tar.gz: e9480110be0cadfad86c5fcc554eb5d1d7264559714e4fd88ea396ae9bb913027b15782c70cd2f4c06e1f4cc5a5bbafe55632ca8eba02ce57e610d98f8440c09
data/.gitlab-ci.yml ADDED
@@ -0,0 +1,29 @@
1
+ image: "ruby:2.7"
2
+
3
+ test:
4
+ before_script:
5
+ - bundle config set path vendor
6
+ - bundle install --jobs $(nproc)
7
+ script:
8
+ - bundle exec rake test
9
+ cache:
10
+ paths:
11
+ - vendor/ruby
12
+
13
+ # integration tests
14
+ itest:
15
+ services:
16
+ - name: postgres:12
17
+ alias: postgres
18
+ command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
+ variables:
20
+ POSTGRES_USER: testuser
21
+ POSTGRES_PASSWORD: testpass
22
+ before_script:
23
+ - bundle config set path vendor
24
+ - bundle install --jobs $(nproc)
25
+ script:
26
+ - bundle exec rake itest
27
+ cache:
28
+ paths:
29
+ - vendor/ruby
data/README.md CHANGED
@@ -6,6 +6,7 @@ parse PostgreSQL CSV log files and extract slow log information:
6
6
  - `MultilineCSVParser`: Parses CSV files that span multiple lines
7
7
  - `PostgreSQLSlowLog`: Extracts slow log entries into `duration_s` and `statement` fields
8
8
  - `PostgreSQLRedactor`: Normalizes the SQL query and redacts sensitive information
9
+ - `Marginalia`: Parses [Marginalia comments](https://github.com/basecamp/marginalia) into key-value pairs and stores them
9
10
 
10
11
  ## Installation
11
12
 
@@ -28,7 +29,6 @@ The configuration below shows how you might use these filters to
28
29
  ingest and parse PostgreSQL CSV logs:
29
30
 
30
31
  ```conf
31
- ## PostgreSQL csvlog (enabled with
32
32
  <source>
33
33
  @type tail
34
34
  tag postgres.postgres_csv
@@ -47,10 +47,20 @@ ingest and parse PostgreSQL CSV logs:
47
47
 
48
48
  <filter postgres.postgres_csv>
49
49
  @type postgresql_slowlog
50
+ output_key query
50
51
  </filter>
51
52
 
52
53
  <filter postgres.postgres_csv>
53
54
  @type postgresql_redactor
55
+ input_key query
56
+ output_key sql
57
+ fingerprint_key fingerprint
58
+ </filter>
59
+
60
+ <filter postgres.postgres_csv>
61
+ @type marginalia
62
+ key sql
63
+ strip_comment true
54
64
  </filter>
55
65
 
56
66
  # Output resulting JSON file to a directory in /tmp
data/Rakefile ADDED
@@ -0,0 +1,18 @@
1
+ require 'bundler'
2
+ Bundler::GemHelper.install_tasks
3
+
4
+ require 'rake/testtask'
5
+
6
+ Rake::TestTask.new(:test) do |test|
7
+ test.libs << 'lib' << 'test'
8
+ test.test_files = FileList['test/**/test_*.rb']
9
+ test.verbose = true
10
+ end
11
+
12
+ Rake::TestTask.new(:itest) do |test|
13
+ test.libs << 'lib' << 'test'
14
+ test.test_files = FileList['test/**/itest_*.rb']
15
+ test.verbose = true
16
+ end
17
+
18
+ task :default => [:build]
@@ -2,10 +2,10 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.0.1'
5
+ s.version = '0.3.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
- s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
9
9
  s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
10
  s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
11
 
@@ -15,7 +15,8 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
- s.add_dependency 'pg_query', '~> 1.3'
18
+ s.add_dependency 'pg', '~> 1.1'
19
+ s.add_dependency 'pg_query', '~> 2.0'
19
20
 
20
21
  s.add_development_dependency 'rake'
21
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -0,0 +1,95 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent
6
+ module Plugin
7
+ # Filters SQL statements for Marginalia comments.
8
+ #
9
+ # Examples:
10
+ # SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/
11
+ # /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"
12
+ #
13
+ class Marginalia < Filter
14
+ Fluent::Plugin.register_filter('marginalia', self)
15
+
16
+ desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
17
+ config_param :key, :string, default: 'sql'
18
+
19
+ desc 'Whether to strip the comment from the record specified by key'
20
+ config_param :strip_comment, :bool, default: true
21
+
22
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
24
+
25
+ def filter(_tag, _time, record)
26
+ parse_comments(record)
27
+
28
+ record
29
+ end
30
+
31
+ private
32
+
33
+ def parse_comments(record)
34
+ sql = record[@key]
35
+
36
+ return unless sql
37
+
38
+ comment_match = match_marginalia_comment(sql)
39
+
40
+ return unless comment_match
41
+
42
+ entries = extract_entries(comment_match['comment'])
43
+ parse_entries(entries, record)
44
+
45
+ record[@key] = comment_match['sql'].strip if @strip_comment
46
+ end
47
+
48
+ def match_marginalia_comment(sql)
49
+ matched = MARGINALIA_PREPENDED_REGEXP.match(sql)
50
+
51
+ return matched if matched
52
+
53
+ MARGINALIA_APPENDED_REGEXP.match(sql)
54
+ end
55
+
56
+ def extract_entries(comment)
57
+ comment = scrub_comment(comment)
58
+
59
+ return [] unless comment
60
+
61
+ comment.split(',')
62
+ end
63
+
64
+ def scrub_comment(comment)
65
+ return unless comment
66
+
67
+ comment.strip!
68
+ comment.gsub!(%r{^/\*}, '')
69
+ comment.gsub!(%r{\*/$}, '')
70
+ end
71
+
72
+ def parse_entries(entries, record)
73
+ entries.each do |component|
74
+ data = component.split(':', 2)
75
+
76
+ break unless data.length == 2
77
+
78
+ stored_key = store_key(record, data[0])
79
+ record[stored_key] = data[1]
80
+ end
81
+ end
82
+
83
+ def store_key(record, component_key)
84
+ # In case there is a conflict with the Marginalia key
85
+ # (e.g. `correlation_id`), we use the base key
86
+ # (`sql_correlation_id`) instead.
87
+ if record.key?(component_key)
88
+ "#{@key}_#{component_key}"
89
+ else
90
+ component_key
91
+ end
92
+ end
93
+ end
94
+ end
95
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fluent/plugin/filter'
2
4
  require 'pg_query'
3
5
 
@@ -5,24 +7,28 @@ module Fluent::Plugin
5
7
  class PostgreSQLRedactor < Filter
6
8
  Fluent::Plugin.register_filter('postgresql_redactor', self)
7
9
 
8
- def configure(conf)
9
- super
10
- end
10
+ desc 'Input field to parse for SQL queries'
11
+ config_param :input_key, :string, default: 'query'
12
+
13
+ desc 'Output field to store SQL queries'
14
+ config_param :output_key, :string, default: 'sql'
15
+
16
+ desc 'Name of field to store SQL query fingerprint'
17
+ config_param :fingerprint_key, :string, default: 'fingerprint'
11
18
 
12
19
  def filter(_tag, _time, record)
13
- statement = record['statement']
20
+ statement = record[@input_key]
14
21
 
15
22
  return record unless statement
16
23
 
17
24
  normalized = PgQuery.normalize(statement)
18
- record.delete('statement')
19
- record['sql'] = normalized
20
- record.delete('message')
25
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
26
+
27
+ record.delete(@input_key)
28
+ record[@output_key] = normalized
21
29
 
22
30
  record
23
31
  rescue PgQuery::ParseError
24
- # pg_query currently only supports PostgresQL 10:
25
- # https://github.com/lfittl/pg_query/issues/184
26
32
  record['pg_query_error'] = true
27
33
  record
28
34
  end
@@ -12,6 +12,9 @@ module Fluent
12
12
  class PostgreSQLSlowLog < Filter
13
13
  Fluent::Plugin.register_filter('postgresql_slowlog', self)
14
14
 
15
+ desc 'Field to output SQL queries'
16
+ config_param :output_key, :string, default: 'query'
17
+
15
18
  SLOWLOG_REGEXP = /^duration: (\d+(?:\.\d+)?) ms .*?:\s*(.*)/m.freeze
16
19
 
17
20
  def filter(_tag, _time, record)
@@ -20,7 +23,8 @@ module Fluent
20
23
  # rubocop:disable Style/PerlBackrefs
21
24
  if record['message'] =~ SLOWLOG_REGEXP
22
25
  record['duration_s'] = $1.to_f / 1000.0
23
- record['statement'] = $2
26
+ record[@output_key] = $2
27
+ record.delete('message')
24
28
  end
25
29
  # rubocop:enable Style/PerlBackrefs
26
30
 
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent
8
+ # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
9
+ # for queryid to query mappings. These are then normalized for security purposes
10
+ # fingerprinted and emitted as records with the following format:
11
+ # {
12
+ # 'fingerprint' => '8a6e9896bd9048a2',
13
+ # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
14
+ # 'query_length' => 58,
15
+ # 'queryid' => 3239318621761098074
16
+ # }
17
+ class PgStatStatementsInput < Input
18
+ Fluent::Plugin.register_input('pg_stat_statements', self)
19
+
20
+ desc 'PostgreSQL host'
21
+ config_param :host, :string
22
+
23
+ desc 'RDBMS port (default: 5432)'
24
+ config_param :port, :integer, default: 5432
25
+
26
+ desc 'login user name'
27
+ config_param :username, :string, default: nil
28
+
29
+ desc 'postgres db'
30
+ config_param :dbname, :string, default: nil
31
+
32
+ desc 'login password'
33
+ config_param :password, :string, default: nil, secret: true
34
+
35
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
+ # for options
37
+ desc 'postgres sslmode'
38
+ config_param :sslmode, :string, default: 'prefer'
39
+
40
+ desc 'tag'
41
+ config_param :tag, :string, default: nil
42
+
43
+ desc 'interval in second to run query'
44
+ config_param :interval, :time, default: 300
45
+
46
+ desc 'Name of field to store SQL query fingerprint'
47
+ config_param :fingerprint_key, :string, default: 'fingerprint'
48
+
49
+ def start
50
+ @stop_flag = false
51
+ @thread = Thread.new(&method(:thread_main))
52
+ end
53
+
54
+ def shutdown
55
+ @stop_flag = true
56
+
57
+ # Interrupt thread and wait for it to finish
58
+ Thread.new { @thread.run } if @thread
59
+ @thread.join
60
+ end
61
+
62
+ def thread_main
63
+ until @stop_flag
64
+ sleep @interval
65
+ break if @stop_flag
66
+
67
+ begin
68
+ with_connection do |conn|
69
+ emit_statements_to_stream(conn)
70
+ end
71
+ rescue StandardError => e
72
+ log.error 'unexpected error', error: e.message, error_class: e.class
73
+ log.error_backtrace e.backtrace
74
+ end
75
+ end
76
+ end
77
+
78
+ # Returns a fluentd record for a query row
79
+ def record_for_row(row)
80
+ query = row['query']
81
+
82
+ # We record the query_length as it will help in understanding whether unparseable
83
+ # queries are truncated.
84
+ record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
85
+
86
+ return record unless query
87
+
88
+ normalized = PgQuery.normalize(query)
89
+ record['query'] = normalized
90
+
91
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
92
+
93
+ record
94
+ rescue PgQuery::ParseError
95
+ record['query_unparseable'] = true
96
+
97
+ record
98
+ end
99
+
100
+ private
101
+
102
+ # Query the database and emit statements to fluentd router
103
+ def emit_statements_to_stream(conn)
104
+ me = MultiEventStream.new
105
+
106
+ now = Engine.now
107
+ conn.exec('SELECT queryid, query FROM pg_stat_statements').each do |row|
108
+ record = record_for_row(row)
109
+ me.add(now, record)
110
+ end
111
+
112
+ @router.emit_stream(@tag, me)
113
+ end
114
+
115
+ # Since this query is very infrequent, and it may be communicating directly
116
+ # with postgres without pgbouncer, don't use a persistent connection and
117
+ # ensure that it is properly closed
118
+ def with_connection(&block)
119
+ conn = PG.connect(
120
+ host: @host,
121
+ dbname: @dbname,
122
+ sslmode: @sslmode,
123
+ user: @username,
124
+ password: @password
125
+ )
126
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
+
128
+ begin
129
+ block.call(conn)
130
+ ensure
131
+ # Always close the connection
132
+ conn.finish
133
+ end
134
+ end
135
+ end
136
+ end
data/test/helper.rb CHANGED
@@ -7,9 +7,13 @@ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
7
  $LOAD_PATH.unshift(__dir__)
8
8
  require 'fluent/test'
9
9
  require 'fluent/test/driver/filter'
10
+ require 'fluent/test/driver/input'
10
11
  require 'fluent/test/helpers'
11
12
 
12
13
  Test::Unit::TestCase.include(Fluent::Test::Helpers)
14
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
13
15
 
14
16
  require 'fluent/plugin/filter_postgresql_slowlog'
15
17
  require 'fluent/plugin/filter_postgresql_redactor'
18
+ require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/input_pg_stat_statements'
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
6
+ # These items are configured in .gitlab-ci.yml on the postgres service
7
+ HOST = 'postgres'
8
+ USERNAME = 'testuser'
9
+ PASSWORD = 'testpass'
10
+
11
+ def setup
12
+ Fluent::Test.setup
13
+
14
+ @conn = PG.connect(
15
+ host: HOST,
16
+ user: USERNAME,
17
+ password: PASSWORD
18
+ )
19
+
20
+ try_setup_extension
21
+ create_known_statement
22
+ end
23
+
24
+ def teardown
25
+ @conn&.finish
26
+ end
27
+
28
+ # Setup pg_stat_statements extension
29
+ def try_setup_extension
30
+ @conn.exec('CREATE EXTENSION pg_stat_statements')
31
+ rescue PG::DuplicateObject
32
+ end
33
+
34
+ # This statement gives us something to look for in the emitted stream
35
+ def create_known_statement
36
+ @conn.exec('SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT 1')
37
+ end
38
+
39
+ VALID_CONFIG = %(
40
+ tag postgres.pg_stat_statements
41
+ host #{HOST}
42
+ username #{USERNAME}
43
+ password #{PASSWORD}
44
+ interval 1
45
+ )
46
+
47
+ INVALID_CONFIG = %(
48
+ host 'invalid_host.dne'
49
+ port 1234
50
+ username #{USERNAME}
51
+ password #{PASSWORD}
52
+ interval 1
53
+ )
54
+
55
+ def create_driver(config)
56
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(config)
57
+ end
58
+
59
+ sub_test_case 'configuration' do
60
+ test 'connects' do
61
+ d = create_driver(VALID_CONFIG)
62
+
63
+ emits = []
64
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
65
+ d.run(num_waits = 50) do
66
+ emits = d.emits
67
+ end
68
+
69
+ assert_false emits.empty?
70
+ end
71
+
72
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
73
+ # the fluentd configuration to fail. We would rather retry until we get a connection
74
+ test 'connects for an invalid config' do
75
+ d = create_driver(INVALID_CONFIG)
76
+
77
+ emits = []
78
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
79
+ d.run(num_waits = 50) do
80
+ emits = d.emits
81
+ end
82
+
83
+ assert_true emits.empty?
84
+ end
85
+ end
86
+
87
+ sub_test_case 'execution' do
88
+ test 'connects' do
89
+ d = create_driver(VALID_CONFIG)
90
+
91
+ emits = []
92
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
93
+ d.run(num_waits = 50) do
94
+ emits = d.emits
95
+ end
96
+
97
+ expected_record = {
98
+ 'fingerprint' => '8a6e9896bd9048a2',
99
+ 'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
100
+ 'query_length' => 58,
101
+ 'queryid' => 3_239_318_621_761_098_074
102
+ }
103
+ known_statement_event = emits.find do |event|
104
+ record = event[2]
105
+ record['query'] == expected_record['query']
106
+ end
107
+
108
+ assert_false known_statement_event.nil?
109
+
110
+ tag = known_statement_event[0]
111
+ record = known_statement_event[2]
112
+
113
+ assert_equal 'postgres.pg_stat_statements', tag
114
+ assert_equal expected_record['fingerprint'], record['fingerprint']
115
+ assert_equal expected_record['query_length'], record['query_length']
116
+ assert_true expected_record.include? 'queryid'
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class Marginalia < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ @tag = 'test.tag'
9
+ end
10
+
11
+ CONFIG = '
12
+ key statement
13
+ '
14
+
15
+ def create_driver(conf = CONFIG)
16
+ Fluent::Test::Driver::Filter.new(Fluent::Plugin::Marginalia).configure(conf)
17
+ end
18
+
19
+ test 'parses appended Marginalia comments' do
20
+ d = create_driver
21
+
22
+ inputs = [
23
+ { 'statement' => 'SELECT * FROM projects' },
24
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /* this is just a comment */' },
25
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/' },
26
+ { 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/' }
27
+ ]
28
+
29
+ d.run(default_tag: @tag) do
30
+ inputs.each { |input| d.feed(input) }
31
+ end
32
+
33
+ assert_equal(inputs[0], d.filtered[0].last)
34
+ assert_equal(inputs[1], d.filtered[1].last)
35
+ assert_equal(inputs[2].merge(
36
+ {
37
+ 'application' => 'sidekiq',
38
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
39
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
40
+ 'job_class' => 'Geo::MetricsUpdateWorker'
41
+ }
42
+ ),
43
+ d.filtered[2].last)
44
+ assert_equal(inputs[3].merge(
45
+ {
46
+ 'application' => 'web',
47
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
48
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
49
+ }
50
+ ),
51
+ d.filtered[3].last)
52
+
53
+ assert_equal('SELECT * FROM projects', d.filtered[0].last['statement'])
54
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
55
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
56
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[3].last['statement'])
57
+ end
58
+
59
+ test 'parses prepended Marginalia comments' do
60
+ d = create_driver
61
+
62
+ inputs = [
63
+ { 'statement' => '/* this is just a comment */ SELECT COUNT(*) FROM "projects"' },
64
+ { 'statement' => '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"' },
65
+ { 'statement' => '/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT COUNT(*) FROM "projects"' },
66
+ { 'statement' => '/*application:sidekiq*/ SELECT COUNT(*) FROM "projects"',
67
+ 'application' => 'test-conflict' }
68
+ ]
69
+
70
+ d.run(default_tag: @tag) do
71
+ inputs.each { |input| d.feed(input) }
72
+ end
73
+
74
+ assert_equal(inputs[0], d.filtered[0].last)
75
+ assert_equal(inputs[1].merge(
76
+ {
77
+ 'application' => 'sidekiq',
78
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
79
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
80
+ 'job_class' => 'Geo::MetricsUpdateWorker'
81
+ }
82
+ ),
83
+ d.filtered[1].last)
84
+ assert_equal(inputs[2].merge(
85
+ {
86
+ 'application' => 'web',
87
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
88
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name'
89
+ }
90
+ ),
91
+ d.filtered[2].last)
92
+ assert_equal(inputs[3].merge(
93
+ {
94
+ 'statement_application' => 'sidekiq'
95
+ }
96
+ ),
97
+ d.filtered[3].last)
98
+
99
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[0].last['statement'])
100
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
101
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
102
+ end
103
+
104
+ test 'parses Marginalia comments with strip_comment disabled' do
105
+ d = create_driver(
106
+ <<~CONF
107
+ strip_comment false
108
+ key sql
109
+ CONF
110
+ )
111
+
112
+ sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/)
113
+ appended_sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/')
114
+
115
+ inputs = [
116
+ { 'sql' => sql },
117
+ { 'sql' => appended_sql }
118
+ ]
119
+
120
+ d.run(default_tag: @tag) do
121
+ inputs.each { |input| d.feed(input) }
122
+ end
123
+
124
+ assert_equal(sql, d.filtered[0].last['sql'])
125
+ assert_equal(appended_sql, d.filtered[1].last['sql'])
126
+ end
127
+ end
@@ -8,11 +8,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
8
8
  @tag = 'test.tag'
9
9
  end
10
10
 
11
- CONFIG = '
12
- <filter test.tag>
13
- @type postgresql_redactor
14
- </filter>
15
- '
11
+ CONFIG = ''
16
12
 
17
13
  def create_driver(conf = CONFIG)
18
14
  Fluent::Test::Driver::Filter.new(Fluent::Plugin::PostgreSQLRedactor).configure(conf)
@@ -23,7 +19,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
23
19
 
24
20
  inputs = [
25
21
  { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
26
- 'statement' => %(SELECT * FROM projects WHERE id = 1),
22
+ 'query' => %(SELECT * FROM projects WHERE id = 1),
27
23
  'duration_s' => 2.3571 }
28
24
  ]
29
25
 
@@ -31,20 +27,43 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
31
27
  inputs.each { |input| d.feed(input) }
32
28
  end
33
29
 
34
- assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
30
+ assert_equal(%w[duration_s fingerprint message sql], d.filtered[0].last.keys.sort)
35
31
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
32
  end
37
33
 
38
34
  test 'handles parse errors' do
39
35
  d = create_driver
40
36
 
41
- input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
37
+ input = { 'query' => 'create index something test (bla) include (bar)' }
42
38
 
43
39
  d.run(default_tag: @tag) do
44
40
  d.feed(input)
45
41
  end
46
42
 
47
- assert_equal(%w[pg_query_error statement], d.filtered[0].last.keys.sort)
48
- assert_equal(input['statement'], d.filtered[0].last['statement'])
43
+ assert_equal(%w[pg_query_error query], d.filtered[0].last.keys.sort)
44
+ assert_equal(input['query'], d.filtered[0].last['query'])
45
+ end
46
+
47
+ test 'uses configured input and output keys' do
48
+ d = create_driver(<<~CONF
49
+ input_key sql
50
+ output_key out_sql
51
+ CONF
52
+ )
53
+
54
+ inputs = [
55
+ {
56
+ 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects WHERE id = 1',
57
+ 'sql' => %(SELECT * FROM projects WHERE id = 1),
58
+ 'duration_s' => 2.3571
59
+ }
60
+ ]
61
+
62
+ d.run(default_tag: @tag) do
63
+ inputs.each { |input| d.feed(input) }
64
+ end
65
+
66
+ assert_equal(%w[duration_s fingerprint message out_sql], d.filtered[0].last.keys.sort)
67
+ assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['out_sql'])
49
68
  end
50
69
  end
@@ -32,18 +32,21 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
32
32
 
33
33
  assert_equal(inputs[0].merge(
34
34
  {
35
- 'statement' => 'SELECT * FROM projects',
35
+ 'query' => 'SELECT * FROM projects',
36
36
  'duration_s' => 2.3571
37
37
  }
38
38
  ),
39
39
  d.filtered[0].last)
40
40
  assert_equal(inputs[1].merge(
41
41
  {
42
- 'statement' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
42
+ 'query' => 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/',
43
43
  'duration_s' => 1.873345
44
44
  }
45
45
  ),
46
46
  d.filtered[1].last)
47
+
48
+ assert_equal(%w[duration_s query], d.filtered[0].last.keys.sort)
49
+ assert_equal(%w[duration_s query], d.filtered[1].last.keys.sort)
47
50
  end
48
51
 
49
52
  test 'ignores messages not having to do with slow logs' do
@@ -55,5 +58,32 @@ class PostgreSQLSlowLogTest < Test::Unit::TestCase
55
58
  end
56
59
 
57
60
  assert_equal(input, d.filtered[0].last)
61
+ assert_equal(%w[message], d.filtered[0].last.keys.sort)
62
+ end
63
+
64
+ test 'outputs slow log entries to configured output key' do
65
+ d = create_driver(
66
+ <<~CONF
67
+ output_key my_key
68
+ CONF
69
+ )
70
+
71
+ inputs = [
72
+ { 'message' => 'duration: 2357.1 ms execute <unnamed>: SELECT * FROM projects' }
73
+ ]
74
+
75
+ d.run(default_tag: @tag) do
76
+ inputs.each { |input| d.feed(input) }
77
+ end
78
+
79
+ assert_equal(inputs[0].merge(
80
+ {
81
+ 'my_key' => 'SELECT * FROM projects',
82
+ 'duration_s' => 2.3571
83
+ }
84
+ ),
85
+ d.filtered[0].last)
86
+
87
+ assert_equal(%w[duration_s my_key], d.filtered[0].last.keys.sort)
58
88
  end
59
89
  end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %(
11
+ tag postgres.pg_stat_statements
12
+ host localhost
13
+ port 1234
14
+ dbname gitlab
15
+ sslmode require
16
+ username moo
17
+ password secret
18
+ interval 600
19
+ fingerprint_key fingerprint
20
+ )
21
+
22
+ def create_driver
23
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(CONFIG)
24
+ end
25
+
26
+ sub_test_case 'configuration' do
27
+ test 'basic configuration' do
28
+ d = create_driver
29
+
30
+ assert_equal 'postgres.pg_stat_statements', d.instance.tag
31
+ assert_equal 'localhost', d.instance.host
32
+ assert_equal 1234, d.instance.port
33
+ assert_equal 'gitlab', d.instance.dbname
34
+ assert_equal 'require', d.instance.sslmode
35
+ assert_equal 'moo', d.instance.username
36
+ assert_equal 'secret', d.instance.password
37
+ assert_equal 600, d.instance.interval
38
+ assert_equal 'fingerprint', d.instance.fingerprint_key
39
+ end
40
+ end
41
+
42
+ sub_test_case 'execution' do
43
+ test 'sql' do
44
+ d = create_driver
45
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
46
+
47
+ expected = {
48
+ 'fingerprint' => 'c071dee80d466e7d',
49
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
50
+ 'query_length' => 37,
51
+ 'queryid' => '1234'
52
+ }
53
+
54
+ assert_equal expected, record
55
+ end
56
+
57
+ test 'nil query' do
58
+ d = create_driver
59
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
60
+
61
+ expected = { 'query_length' => nil, 'queryid' => '1234' }
62
+ assert_equal expected, record
63
+ end
64
+
65
+ test 'ddl query' do
66
+ d = create_driver
67
+ ddl_sql = <<-SQL
68
+ CREATE TABLE accounts (
69
+ user_id serial PRIMARY KEY,
70
+ username VARCHAR(50) UNIQUE NOT NULL,
71
+ password VARCHAR(50) NOT NULL,
72
+ email VARCHAR(255) UNIQUE NOT NULL,
73
+ created_on TIMESTAMP NOT NULL,
74
+ last_login TIMESTAMP
75
+ )
76
+ SQL
77
+
78
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
79
+
80
+ expected = {
81
+ 'fingerprint' => 'fa9c9d26757c4f9b',
82
+ 'query' => ddl_sql,
83
+ 'query_length' => 287,
84
+ 'queryid' => '1234'
85
+ }
86
+ assert_equal expected, record
87
+ end
88
+
89
+ test 'set command' do
90
+ d = create_driver
91
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
92
+
93
+ expected = {
94
+ 'fingerprint' => '23f8d6eb1d3125c3',
95
+ 'query' => 'SET TIME ZONE $1',
96
+ 'query_length' => 23,
97
+ 'queryid' => '1234'
98
+ }
99
+
100
+ assert_equal expected, record
101
+ end
102
+
103
+ test 'unparseable sql' do
104
+ d = create_driver
105
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
106
+
107
+ expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
+ assert_equal expected, record
109
+ end
110
+ end
111
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-10 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -30,20 +30,34 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: pg_query
35
49
  requirement: !ruby/object:Gem::Requirement
36
50
  requirements:
37
51
  - - "~>"
38
52
  - !ruby/object:Gem::Version
39
- version: '1.3'
53
+ version: '2.0'
40
54
  type: :runtime
41
55
  prerelease: false
42
56
  version_requirements: !ruby/object:Gem::Requirement
43
57
  requirements:
44
58
  - - "~>"
45
59
  - !ruby/object:Gem::Version
46
- version: '1.3'
60
+ version: '2.0'
47
61
  - !ruby/object:Gem::Dependency
48
62
  name: rake
49
63
  requirement: !ruby/object:Gem::Requirement
@@ -79,18 +93,24 @@ executables: []
79
93
  extensions: []
80
94
  extra_rdoc_files: []
81
95
  files:
96
+ - ".gitlab-ci.yml"
82
97
  - Gemfile
83
- - Gemfile.lock
84
98
  - LICENSE
85
99
  - README.md
100
+ - Rakefile
86
101
  - fluent-plugin-postgresql-csvlog.gemspec
102
+ - lib/fluent/plugin/filter_marginalia.rb
87
103
  - lib/fluent/plugin/filter_postgresql_redactor.rb
88
104
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
105
+ - lib/fluent/plugin/input_pg_stat_statements.rb
89
106
  - lib/fluent/plugin/parser_multiline_csv.rb
90
107
  - test/helper.rb
108
+ - test/plugin/itest_input_pg_stat_statements.rb
109
+ - test/plugin/test_filter_marginalia.rb
91
110
  - test/plugin/test_filter_postgresql_redactor.rb
92
111
  - test/plugin/test_filter_postgresql_slowlog.rb
93
- homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
112
+ - test/plugin/test_input_pg_stat_statements.rb
113
+ homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
94
114
  licenses: []
95
115
  metadata: {}
96
116
  post_install_message:
@@ -112,4 +132,10 @@ rubygems_version: 3.1.4
112
132
  signing_key:
113
133
  specification_version: 4
114
134
  summary: fluentd plugins to work with PostgreSQL CSV logs
115
- test_files: []
135
+ test_files:
136
+ - test/helper.rb
137
+ - test/plugin/itest_input_pg_stat_statements.rb
138
+ - test/plugin/test_filter_marginalia.rb
139
+ - test/plugin/test_filter_postgresql_redactor.rb
140
+ - test/plugin/test_filter_postgresql_slowlog.rb
141
+ - test/plugin/test_input_pg_stat_statements.rb
data/Gemfile.lock DELETED
@@ -1,50 +0,0 @@
1
- PATH
2
- remote: .
3
- specs:
4
- fluent-plugin-postgresql-csvlog (0.0.1)
5
- fluentd (>= 1.0, < 2)
6
- pg_query (~> 1.3)
7
-
8
- GEM
9
- remote: https://rubygems.org/
10
- specs:
11
- concurrent-ruby (1.1.8)
12
- cool.io (1.7.0)
13
- fluentd (1.12.0)
14
- bundler
15
- cool.io (>= 1.4.5, < 2.0.0)
16
- http_parser.rb (>= 0.5.1, < 0.7.0)
17
- msgpack (>= 1.3.1, < 2.0.0)
18
- serverengine (>= 2.2.2, < 3.0.0)
19
- sigdump (~> 0.2.2)
20
- strptime (>= 0.2.2, < 1.0.0)
21
- tzinfo (>= 1.0, < 3.0)
22
- tzinfo-data (~> 1.0)
23
- yajl-ruby (~> 1.0)
24
- http_parser.rb (0.6.0)
25
- msgpack (1.4.2)
26
- pg_query (1.3.0)
27
- power_assert (2.0.0)
28
- rake (13.0.3)
29
- serverengine (2.2.2)
30
- sigdump (~> 0.2.2)
31
- sigdump (0.2.4)
32
- strptime (0.2.5)
33
- test-unit (3.4.0)
34
- power_assert
35
- tzinfo (2.0.4)
36
- concurrent-ruby (~> 1.0)
37
- tzinfo-data (1.2021.1)
38
- tzinfo (>= 1.0.0)
39
- yajl-ruby (1.4.1)
40
-
41
- PLATFORMS
42
- ruby
43
-
44
- DEPENDENCIES
45
- fluent-plugin-postgresql-csvlog!
46
- rake
47
- test-unit (~> 3.2)
48
-
49
- BUNDLED WITH
50
- 2.1.4