fluent-plugin-postgresql-csvlog 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 002b7169e3f3ac6493eb09dca5c2e3820a944f38c64a0b45641f3cdbd1717ddf
4
- data.tar.gz: eac3646a2404665924c4fa30114b91ecce49dbadef3919d89eab458450568328
3
+ metadata.gz: 3f63769c3a4c78fc3db074b812e6f75e77c46b355563b403c3de620c49935392
4
+ data.tar.gz: 2fd8d1acd1fac5ce24b72ed4d488fc5f80f0a8752084784cf16f419389f34499
5
5
  SHA512:
6
- metadata.gz: 354478f3573f0934dcee72305069aa8f25333087d1667fc4e127963b8f8f955b66a02462925e4672f27f80e43b772640a68a4760266f461704bd9486ad66a3ef
7
- data.tar.gz: f6d89c2db73d337b1aa52e9838ab08f6f3bda80e7053307482fa5b284b4b6f81ae0389e2a20411fbcc383c878b5309a320946d5849d43fa466e469037b9d5102
6
+ metadata.gz: 34fefa81b79223bf1840481f6ffd232e4d343a1eed9876f8f58893f686c8c4a1c1fbd37679c1a8960bcddef3a992733fe4b53158ee7558c10b49ac42a1793996
7
+ data.tar.gz: d105daf33a577b40e0035bc2a6d512390c361bb42bb9a11c2290e7489f04f813338779b45682969fa7616e8d7bb319db04dfec1eaf624586e50709c4a71eba83
data/.gitlab-ci.yml CHANGED
@@ -2,6 +2,7 @@ image: "ruby:2.7"
2
2
 
3
3
  test:
4
4
  before_script:
5
+ - bundle config set path vendor
5
6
  - bundle install --jobs $(nproc)
6
7
  script:
7
8
  - bundle exec rake test
data/Gemfile.lock CHANGED
@@ -1,16 +1,16 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- fluent-plugin-postgresql-csvlog (0.0.2)
4
+ fluent-plugin-postgresql-csvlog (0.1.0)
5
5
  fluentd (>= 1.0, < 2)
6
- pg_query (~> 1.3)
6
+ pg_query (~> 2.0)
7
7
 
8
8
  GEM
9
9
  remote: https://rubygems.org/
10
10
  specs:
11
11
  concurrent-ruby (1.1.8)
12
- cool.io (1.7.0)
13
- fluentd (1.12.0)
12
+ cool.io (1.7.1)
13
+ fluentd (1.12.1)
14
14
  bundler
15
15
  cool.io (>= 1.4.5, < 2.0.0)
16
16
  http_parser.rb (>= 0.5.1, < 0.7.0)
@@ -21,12 +21,14 @@ GEM
21
21
  tzinfo (>= 1.0, < 3.0)
22
22
  tzinfo-data (~> 1.0)
23
23
  yajl-ruby (~> 1.0)
24
+ google-protobuf (3.15.6-universal-darwin)
24
25
  http_parser.rb (0.6.0)
25
26
  msgpack (1.4.2)
26
- pg_query (1.3.0)
27
+ pg_query (2.0.1)
28
+ google-protobuf (~> 3.15.5)
27
29
  power_assert (2.0.0)
28
30
  rake (13.0.3)
29
- serverengine (2.2.2)
31
+ serverengine (2.2.3)
30
32
  sigdump (~> 0.2.2)
31
33
  sigdump (0.2.4)
32
34
  strptime (0.2.5)
data/README.md CHANGED
@@ -51,11 +51,14 @@ ingest and parse PostgreSQL CSV logs:
51
51
 
52
52
  <filter postgres.postgres_csv>
53
53
  @type postgresql_redactor
54
+ key sql
55
+ fingerprint_key fingerprint
54
56
  </filter>
55
57
 
56
58
  <filter postgres.postgres_csv>
57
59
  @type marginalia
58
60
  key sql
61
+ strip_comment true
59
62
  </filter>
60
63
 
61
64
  # Output resulting JSON file to a directory in /tmp
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.0.2'
5
+ s.version = '0.1.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
@@ -15,7 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
- s.add_dependency 'pg_query', '~> 1.3'
18
+ s.add_dependency 'pg_query', '~> 2.0'
19
19
 
20
20
  s.add_development_dependency 'rake'
21
21
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -16,9 +16,11 @@ module Fluent
16
16
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
17
17
  config_param :key, :string, default: 'sql'
18
18
 
19
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/).*}m.freeze
20
- MARGINALIA_APPENDED_REGEXP = %r{.*(?<comment>/\*.*\*/)$}m.freeze
21
- MARGINALIA_KEY_VALUE_REGEXP = /^(?<key>.*):?(?<value>.*)$/.freeze
19
+ desc 'Whether to strip the comment from the record specified by key'
20
+ config_param :strip_comment, :bool, default: true
21
+
22
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
22
24
 
23
25
  def filter(_tag, _time, record)
24
26
  parse_comments(record)
@@ -38,15 +40,9 @@ module Fluent
38
40
  return unless comment_match
39
41
 
40
42
  entries = extract_entries(comment_match['comment'])
43
+ parse_entries(entries, record)
41
44
 
42
- entries.each do |component|
43
- data = component.split(':', 2)
44
-
45
- break unless data.length == 2
46
-
47
- stored_key = store_key(record, data[0])
48
- record[stored_key] = data[1]
49
- end
45
+ record[@key] = comment_match['sql'].strip if @strip_comment
50
46
  end
51
47
 
52
48
  def match_marginalia_comment(sql)
@@ -73,7 +69,21 @@ module Fluent
73
69
  comment.gsub!(%r{\*/$}, '')
74
70
  end
75
71
 
72
+ def parse_entries(entries, record)
73
+ entries.each do |component|
74
+ data = component.split(':', 2)
75
+
76
+ break unless data.length == 2
77
+
78
+ stored_key = store_key(record, data[0])
79
+ record[stored_key] = data[1]
80
+ end
81
+ end
82
+
76
83
  def store_key(record, component_key)
84
+ # In case there is a conflict with the Marginalia key
85
+ # (e.g. `correlation_id`), we use the base key
86
+ # (`sql_correlation_id`) instead.
77
87
  if record.key?(component_key)
78
88
  "#{@key}_#{component_key}"
79
89
  else
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fluent/plugin/filter'
2
4
  require 'pg_query'
3
5
 
@@ -5,9 +7,11 @@ module Fluent::Plugin
5
7
  class PostgreSQLRedactor < Filter
6
8
  Fluent::Plugin.register_filter('postgresql_redactor', self)
7
9
 
8
- def configure(conf)
9
- super
10
- end
10
+ desc 'Field to parse for SQL queries'
11
+ config_param :key, :string, default: 'sql'
12
+
13
+ desc 'Name of field to store SQL query fingerprint'
14
+ config_param :fingerprint_key, :string, default: 'fingerprint'
11
15
 
12
16
  def filter(_tag, _time, record)
13
17
  statement = record['statement']
@@ -15,14 +19,14 @@ module Fluent::Plugin
15
19
  return record unless statement
16
20
 
17
21
  normalized = PgQuery.normalize(statement)
22
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
23
+
18
24
  record.delete('statement')
19
- record['sql'] = normalized
25
+ record[@key] = normalized
20
26
  record.delete('message')
21
27
 
22
28
  record
23
29
  rescue PgQuery::ParseError
24
- # pg_query currently only supports PostgresQL 10:
25
- # https://github.com/lfittl/pg_query/issues/184
26
30
  record['pg_query_error'] = true
27
31
  record
28
32
  end
@@ -29,8 +29,8 @@ class Marginalia < Test::Unit::TestCase
29
29
  inputs.each { |input| d.feed(input) }
30
30
  end
31
31
 
32
- assert_equal(inputs[0].merge, d.filtered[0].last)
33
- assert_equal(inputs[1].merge, d.filtered[1].last)
32
+ assert_equal(inputs[0], d.filtered[0].last)
33
+ assert_equal(inputs[1], d.filtered[1].last)
34
34
  assert_equal(inputs[2].merge(
35
35
  {
36
36
  'application' => 'sidekiq',
@@ -40,6 +40,10 @@ class Marginalia < Test::Unit::TestCase
40
40
  }
41
41
  ),
42
42
  d.filtered[2].last)
43
+
44
+ assert_equal('SELECT * FROM projects', d.filtered[0].last['statement'])
45
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
46
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
43
47
  end
44
48
 
45
49
  test 'parses prepended Marginalia comments' do
@@ -56,7 +60,7 @@ class Marginalia < Test::Unit::TestCase
56
60
  inputs.each { |input| d.feed(input) }
57
61
  end
58
62
 
59
- assert_equal(inputs[0].merge, d.filtered[0].last)
63
+ assert_equal(inputs[0], d.filtered[0].last)
60
64
  assert_equal(inputs[1].merge(
61
65
  {
62
66
  'application' => 'sidekiq',
@@ -72,5 +76,33 @@ class Marginalia < Test::Unit::TestCase
72
76
  }
73
77
  ),
74
78
  d.filtered[2].last)
79
+
80
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[0].last['statement'])
81
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[1].last['statement'])
82
+ assert_equal('SELECT COUNT(*) FROM "projects"', d.filtered[2].last['statement'])
83
+ end
84
+
85
+ test 'parses Marginalia comments with strip_comment disabled' do
86
+ d = create_driver(
87
+ <<~CONF
88
+ strip_comment false
89
+ key sql
90
+ CONF
91
+ )
92
+
93
+ sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/)
94
+ appended_sql = %(SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/')
95
+
96
+ inputs = [
97
+ { 'sql' => sql },
98
+ { 'sql' => appended_sql }
99
+ ]
100
+
101
+ d.run(default_tag: @tag) do
102
+ inputs.each { |input| d.feed(input) }
103
+ end
104
+
105
+ assert_equal(sql, d.filtered[0].last['sql'])
106
+ assert_equal(appended_sql, d.filtered[1].last['sql'])
75
107
  end
76
108
  end
@@ -31,14 +31,14 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
31
31
  inputs.each { |input| d.feed(input) }
32
32
  end
33
33
 
34
- assert_equal(%w[duration_s sql], d.filtered[0].last.keys.sort)
34
+ assert_equal(%w[duration_s fingerprint sql], d.filtered[0].last.keys.sort)
35
35
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
36
36
  end
37
37
 
38
38
  test 'handles parse errors' do
39
39
  d = create_driver
40
40
 
41
- input = { 'statement' => 'create index concurrently foo on test (bla) include (bar)' }
41
+ input = { 'statement' => 'create index something test (bla) include (bar)' }
42
42
 
43
43
  d.run(default_tag: @tag) do
44
44
  d.feed(input)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-02-20 00:00:00.000000000 Z
11
+ date: 2021-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -36,14 +36,14 @@ dependencies:
36
36
  requirements:
37
37
  - - "~>"
38
38
  - !ruby/object:Gem::Version
39
- version: '1.3'
39
+ version: '2.0'
40
40
  type: :runtime
41
41
  prerelease: false
42
42
  version_requirements: !ruby/object:Gem::Requirement
43
43
  requirements:
44
44
  - - "~>"
45
45
  - !ruby/object:Gem::Version
46
- version: '1.3'
46
+ version: '2.0'
47
47
  - !ruby/object:Gem::Dependency
48
48
  name: rake
49
49
  requirement: !ruby/object:Gem::Requirement