fluent-plugin-postgresql-csvlog 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 002b7169e3f3ac6493eb09dca5c2e3820a944f38c64a0b45641f3cdbd1717ddf
4
- data.tar.gz: eac3646a2404665924c4fa30114b91ecce49dbadef3919d89eab458450568328
3
+ metadata.gz: 3f63769c3a4c78fc3db074b812e6f75e77c46b355563b403c3de620c49935392
4
+ data.tar.gz: 2fd8d1acd1fac5ce24b72ed4d488fc5f80f0a8752084784cf16f419389f34499
5
5
  SHA512:
6
- metadata.gz: 354478f3573f0934dcee72305069aa8f25333087d1667fc4e127963b8f8f955b66a02462925e4672f27f80e43b772640a68a4760266f461704bd9486ad66a3ef
7
- data.tar.gz: f6d89c2db73d337b1aa52e9838ab08f6f3bda80e7053307482fa5b284b4b6f81ae0389e2a20411fbcc383c878b5309a320946d5849d43fa466e469037b9d5102
6
+ metadata.gz: 34fefa81b79223bf1840481f6ffd232e4d343a1eed9876f8f58893f686c8c4a1c1fbd37679c1a8960bcddef3a992733fe4b53158ee7558c10b49ac42a1793996
7
+ data.tar.gz: d105daf33a577b40e0035bc2a6d512390c361bb42bb9a11c2290e7489f04f813338779b45682969fa7616e8d7bb319db04dfec1eaf624586e50709c4a71eba83
data/.gitlab-ci.yml CHANGED
@@ -2,6 +2,7 @@ image: "ruby:2.7"
2
2
 
3
3
  test:
4
4
  before_script:
5
+ - bundle config set path vendor
5
6
  - bundle install --jobs $(nproc)
6
7
  script:
7
8
  - bundle exec rake test
data/Gemfile.lock CHANGED
@@ -1,16 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-postgresql-csvlog (0.0.2)
4
+ fluent-plugin-postgresql-csvlog (0.1.0)
5
5
  fluentd (>= 1.0, < 2)
6
- pg_query (~> 1.3)
6
+ pg_query (~> 2.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  concurrent-ruby (1.1.8)
12
- cool.io (1.7.0)
13
- fluentd (1.12.0)
12
+ cool.io (1.7.1)
13
+ fluentd (1.12.1)
14
14
  bundler
15
15
  cool.io (>= 1.4.5, < 2.0.0)
16
16
  http_parser.rb (>= 0.5.1, < 0.7.0)
@@ -21,12 +21,14 @@ GEM
21
21
  tzinfo (>= 1.0, < 3.0)
22
22
  tzinfo-data (~> 1.0)
23
23
  yajl-ruby (~> 1.0)
24
+ google-protobuf (3.15.6-universal-darwin)
24
25
  http_parser.rb (0.6.0)
25
26
  msgpack (1.4.2)
26
- pg_query (1.3.0)
27
+ pg_query (2.0.1)
28
+ google-protobuf (~> 3.15.5)
27
29
  power_assert (2.0.0)
28
30
  rake (13.0.3)
29
- serverengine (2.2.2)
31
+ serverengine (2.2.3)
30
32
  sigdump (~> 0.2.2)
31
33
  sigdump (0.2.4)
32
34
  strptime (0.2.5)
data/README.md CHANGED
@@ -51,11 +51,14 @@ ingest and parse PostgreSQL CSV logs:
51
51
 
52
52
  <filter postgres.postgres_csv>
53
53
  @type postgresql_redactor
54
+ key sql
55
+ fingerprint_key fingerprint
54
56
  </filter>
55
57
 
56
58
  <filter postgres.postgres_csv>
57
59
  @type marginalia
58
60
  key sql
61
+ strip_comment true
59
62
  </filter>
60
63
 
61
64
  # Output resulting JSON file to a directory in /tmp
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.0.2'
5
+ s.version = '0.1.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
- s.add_dependency 'pg_query', '~> 1.3'
18
+ s.add_dependency 'pg_query', '~> 2.0'
19
19
 
20
20
  s.add_development_dependency 'rake'
21
21
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -16,9 +16,11 @@ module Fluent
16
16
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
17
17
  config_param :key, :string, default: 'sql'
18
18
 
19
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/).*}m.freeze
20
- MARGINALIA_APPENDED_REGEXP = %r{.*(?<comment>/\*.*\*/)$}m.freeze
21
- MARGINALIA_KEY_VALUE_REGEXP = /^(?<key>.*):?(?<value>.*)$/.freeze
19
+ desc 'Whether to strip the comment from the record specified by key'
20
+ config_param :strip_comment, :bool, default: true
21
+
22
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
22
24
 
23
25
  def filter(_tag, _time, record)
24
26
  parse_comments(record)
@@ -38,15 +40,9 @@ module Fluent
38
40
  return unless comment_match
39
41
 
40
42
  entries = extract_entries(comment_match['comment'])
43
+ parse_entries(entries, record)
41
44
 
42
- entries.each do |component|
43
- data = component.split(':', 2)
44
-
45
- break unless data.length == 2
46
-
47
- stored_key = store_key(record, data[0])
48
- record[stored_key] = data[1]
49
- end
45
+ record[@key] = comment_match['sql'].strip if @strip_comment
50
46
  end
51
47
 
52
48
  def match_marginalia_comment(sql)
@@ -73,7 +69,21 @@ module Fluent
73
69
  comment.gsub!(%r{\*/$}, '')
74
70
  end
75
71
 
72
+ def parse_entries(entries, record)
73
+ entries.each do |component|
74
+ data = component.split(':', 2)
75
+
76
+ break unless data.length == 2
77
+
78
+ stored_key = store_key(record, data[0])
79
+ record[stored_key] = data[1]
80
+ end
81
+ end
82
+
76
83
  def store_key(record, component_key)
84
+ # In case there is a conflict with the Marginalia key
85
+ # (e.g. `correlation_id`), we use the base key
86
+ # (`sql_correlation_id`) instead.
77
87
  if record.key?(component_key)
78
88
  "#{@key}_#{component_key}"
79
89
  else
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fluent/plugin/filter'
2
4
  require 'pg_query'
3
5
 
@@ -5,9 +7,11 @@ module Fluent::Plugin
5
7
  class PostgreSQLRedactor < Filter
6
8
  Fluent::Plugin.register_filter('postgresql_redactor', self)
7
9
 
8
- def configure(conf)
9
- super
10
- end
10
+ desc 'Field to parse for SQL queries'
11
+ config_param :key, :string, default: 'sql'
12
+
13
+ desc 'Name of field to store SQL query fingerprint'
14
+ config_param :fingerprint_key, :string, default: 'fingerprint'
11
15
 
12
16
  def filter(_tag, _time, record)
13
17
  statement = record['statement']
@@ -15,14 +19,14 @@ module Fluent::Plugin
15
19
  return record unless statement
16
20
 
17
21
  normalized = PgQuery.normalize(statement)
22
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
23
+
18
24
  record.delete('statement')
19
- record['sql'] = normalized
25
+ record[@key] = normalized
20
26
  record.delete('message')
21
27
 
22
28
  record
23
29
  rescue PgQuery::ParseError
24
- # pg_query currently only supports PostgresQL 10:
25
- # https://github.com/lfittl/pg_query/issues/184
26
30
  record['pg_query_error'] = true
27
31
  record
28
32
  end
@@ -29,8 +29,8 @@ class Marginalia < Test::Unit::TestCase
29
29
  inputs.each { |input| d.feed(input) }
30
30
  end
31
31
 
32
- assert_equal(inputs[0].merge, d.filtered[0].last)
33
- assert_equal(inputs[1].merge, d.filtered[1].last)
32
+ assert_equal(inputs[0], d.filtered[0].last)
33
+ assert_equal(inputs[1], d.filtered[1].last)
34
34
  assert_equal(inputs[2].merge(
35
35
  {
36
36
  'application' => 'sidekiq',
@@ -40,6 +40,10 @@ class Marginalia < Test::Unit::TestCase
40
40
  }
41
41
  ),
42
42
  d.filtered[2].last)
43
+
44
+ assert_equal('SELECT * FROM projects', d.filtered[0].last['statement'])
45
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
46
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
43
47
  end
44
48
 
45
49
  test 'parses prepended Marginalia comments' do
@@ -56,7 +60,7 @@ class Marginalia < Test::Unit::TestCase
56
60
  inputs.each { |input| d.feed(input) }
57
61
  end
58
62
 
59
- assert_equal(inputs[0].merge, d.filtered[0].last)
63
+ assert_equal(inputs[0], d.filtered[0].last)
60
64
  assert_equal(inputs[1].merge(
61
65
  {
62
66
  'application' => 'sidekiq',
@@ -72,5 +76,33 @@ class Marginalia < Test::Unit::TestCase
72
76
  }
73
77
  ),
74
78
  d.filtered[2].last)
79
+
80
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[0].last['statement'])
81
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
82
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
83
+ end
84
+
85
+ test 'parses Marginalia comments with strip_comment disabled' do
86
+ d = create_driver(
87
+ <<~CONF
88
+ strip_comment false
89
+ key sql
90
+ CONF
91
+ )
92
+
93
+ sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/)
94
+ appended_sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/')
95
+
96
+ inputs = [
97
+ { 'sql' => sql },
98
+ { 'sql' => appended_sql }
99
+ ]
100
+
101
+ d.run(default_tag: @tag) do
102
+ inputs.each { |input| d.feed(input) }
103
+ end
104
+
105
+ assert_equal(sql, d.filtered[0].last['sql'])
106
+ assert_equal(appended_sql, d.filtered[1].last['sql'])
75
107
  end
76
108
  end
@@ -31,14 +31,14 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
31
31
  inputs.each { |input| d.feed(input) }
32
32
  end
33
33
 
34
- assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
34
+ assert_equal(%w[duration_s fingerprint sql], d.filtered[0].last.keys.sort)
35
35
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
36
  end
37
37
 
38
38
  test 'handles parse errors' do
39
39
  d = create_driver
40
40
 
41
- input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
41
+ input = { 'statement' => 'create index something test (bla) include (bar)' }
42
42
 
43
43
  d.run(default_tag: @tag) do
44
44
  d.feed(input)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-20 00:00:00.000000000 Z
11
+ date: 2021-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -36,14 +36,14 @@ dependencies:
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.3'
39
+ version: '2.0'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.3'
46
+ version: '2.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement