fluent-plugin-postgresql-csvlog 0.10.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2393ec76502b4127cd874027d99eada9e184dba85edfec3c2a9cfe1a88de54ee
4
- data.tar.gz: 2f61b0b0d4e27a302f90449af833dba0e985406d98f15be773ac379146a958d1
3
+ metadata.gz: af7b81970b58d90863db57c0386b096ca5fe62f4b5c100cd62a6a40b416c15a1
4
+ data.tar.gz: 53fdf00e4405b46f5db1a40abc29a1d8e1ffe2daeb9f3597027f96fb9fb2d1c0
5
5
  SHA512:
6
- metadata.gz: b537e7458f9fa38bb0b88672e137a345739f22ab92f8a3d9fe3cf0e26f9e958995c2f7fdb51afc649ac5f55e608ddd431ef904dd757e651f1b223be58703d7d6
7
- data.tar.gz: 7724c6961993e7e50ef38ebfa22b874fd4578eb8d8057d2cac76fe761f723a5631e1a227c791c7debf24c2631bc4eea1c2febe929f723a5584c52adbb2b91a49
6
+ metadata.gz: f69402f67122706714e07283efc6b131f8c8b4f06851ed77b01d5188d305dd9d57e5e62a3d732cba0272b06db6c5e266d953fb19edb5af27d4396e715ca05be7
7
+ data.tar.gz: ae1f30e585ace5fdba2d81921f22eec5f2a99a0ae3cec8ae71e2e19fa27b89e10f4f81e7057edbea913582920dc3d4bdcb81195a42e13c21077bccd1667da368
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.10.0'
5
+ s.version = '0.11.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'fluent/plugin/filter'
4
- require 'pg_query'
4
+
5
+ require_relative './query_normalizer'
5
6
 
6
7
  module Fluent::Plugin
7
8
  class PostgreSQLRedactor < Filter
@@ -19,32 +20,17 @@ module Fluent::Plugin
19
20
  desc 'Truncate the query if it exceeds the number of bytes'
20
21
  config_param :max_length, :integer, default: 3 * 1024 * 1024
21
22
 
22
- def filter(_tag, _time, record)
23
- statement = record[@input_key]
24
-
25
- return record unless statement
26
-
27
- normalized = PgQuery.normalize(statement)
28
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
29
-
30
- record.delete(@input_key)
31
- record[@output_key] = truncate_query(record, normalized)
23
+ include QueryNormalizer
32
24
 
33
- record
34
- rescue PgQuery::ParseError
35
- record['pg_query_error'] = true
36
- record
37
- end
38
-
39
- private
40
-
41
- def truncate_query(record, normalized)
42
- return normalized if normalized.length < @max_length
43
-
44
- record['truncated_query'] = true
45
- # Assume UTF-8 encoding for PostgreSQL queries
46
- utf8_query = normalized.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
47
- utf8_query[0, @max_length]
25
+ def filter(_tag, _time, record)
26
+ opts = {
27
+ input_key: @input_key,
28
+ output_key: @output_key,
29
+ fingerprint_key: @fingerprint_key,
30
+ max_length: @max_length
31
+ }
32
+
33
+ normalize_and_fingerprint_query(record, opts)
48
34
  end
49
35
  end
50
36
  end
@@ -12,6 +12,8 @@ module Fluent::Plugin
12
12
  # Fingerprints of the queries are also included for easier aggregation
13
13
  class PgStatActivityInput < PollingPostgresInputPlugin
14
14
  include MarginaliaExtractor
15
+ include QueryNormalizer
16
+
15
17
  Fluent::Plugin.register_input('pg_stat_activity', self)
16
18
 
17
19
  ACTIVITY_QUERY = <<-SQL
@@ -42,6 +44,9 @@ module Fluent::Plugin
42
44
  desc 'Name of field to store SQL query fingerprint'
43
45
  config_param :fingerprint_key, :string, default: 'fingerprint'
44
46
 
47
+ desc 'Truncate the query if it exceeds the number of bytes'
48
+ config_param :max_length, :integer, default: 3 * 1024 * 1024
49
+
45
50
  protected
46
51
 
47
52
  def on_poll
@@ -94,31 +99,14 @@ module Fluent::Plugin
94
99
  # Inject marginalia into record
95
100
  parse_marginalia_into_record(record, 'query', true)
96
101
 
97
- # Normalize query and fingerprint
98
- # Note that `record['query']` was updated in previous step
99
- # To strip off marginalia comments
100
- record.merge!(fingerprint_query(record['query']))
101
-
102
- record
103
- end
104
-
105
- def fingerprint_query(query)
106
- # We record the query_length as it will help in understanding whether unparseable
107
- # queries are truncated.
108
- record = { 'query_length' => query&.length, 'query' => nil }
109
-
110
- return record unless query
111
-
112
- normalized = PgQuery.normalize(query)
113
- record['query'] = normalized
114
-
115
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
116
-
117
- record
118
- rescue PgQuery::ParseError
119
- record['query_unparseable'] = true
102
+ opts = {
103
+ input_key: 'query',
104
+ output_key: 'query',
105
+ fingerprint_key: @fingerprint_key,
106
+ max_length: @max_length
107
+ }
120
108
 
121
- record
109
+ normalize_and_fingerprint_query(record, opts)
122
110
  end
123
111
  end
124
112
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative './polling_pg_input_plugin'
4
- require 'pg_query'
4
+ require_relative './query_normalizer'
5
5
 
6
6
  module Fluent::Plugin
7
7
  # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
@@ -19,6 +19,11 @@ module Fluent::Plugin
19
19
  desc 'Name of field to store SQL query fingerprint'
20
20
  config_param :fingerprint_key, :string, default: 'fingerprint'
21
21
 
22
+ desc 'Truncate the query if it exceeds the number of bytes'
23
+ config_param :max_length, :integer, default: 3 * 1024 * 1024
24
+
25
+ include QueryNormalizer
26
+
22
27
  POSTGRES_SERVER_VERSION_QUERY = "SELECT current_setting('server_version_num')"
23
28
 
24
29
  PG12_STAT_STATEMENTS_QUERY = <<-SQL
@@ -56,30 +61,22 @@ module Fluent::Plugin
56
61
 
57
62
  # Returns a fluentd record for a query row
58
63
  def record_for_row(row)
59
- query = row['query']
60
-
61
- # We record the query_length as it will help in understanding whether unparseable
62
- # queries are truncated.
63
64
  record = {
65
+ 'query' => row['query'],
64
66
  'queryid' => row['queryid'].to_s,
65
- 'query_length' => query&.length,
66
67
  'calls' => row['calls']&.to_i,
67
68
  'total_time_ms' => row['total_time']&.to_f,
68
69
  'rows' => row['rows']&.to_i
69
70
  }
70
71
 
71
- return record unless query
72
-
73
- normalized = PgQuery.normalize(query)
74
- record['query'] = normalized
75
-
76
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
77
-
78
- record
79
- rescue PgQuery::ParseError
80
- record['query_unparseable'] = true
72
+ opts = {
73
+ input_key: 'query',
74
+ output_key: 'query',
75
+ fingerprint_key: @fingerprint_key,
76
+ max_length: @max_length
77
+ }
81
78
 
82
- record
79
+ normalize_and_fingerprint_query(record, opts)
83
80
  end
84
81
 
85
82
  # Query the database and emit statements to fluentd router
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pg_query'
4
+
5
+ module Fluent::Plugin
6
+ module QueryNormalizer
7
+ def normalize_and_fingerprint_query(record, opts)
8
+ input_key = opts[:input_key]
9
+ output_key = opts[:output_key]
10
+ fingerprint_key = opts[:fingerprint_key]
11
+ max_length = opts[:max_length]
12
+ statement = record[input_key]
13
+
14
+ return record unless statement
15
+
16
+ # We record the query_length as it will help in understanding whether unparseable
17
+ # queries are truncated.
18
+ record['query_length'] = statement&.length
19
+ normalized = PgQuery.normalize(statement)
20
+
21
+ record.delete(input_key)
22
+ record[fingerprint_key] = PgQuery.fingerprint(normalized) if fingerprint_key
23
+ record[output_key] = truncate_query(record, normalized, max_length)
24
+
25
+ record
26
+ rescue PgQuery::ParseError
27
+ record['query_unparseable'] = true
28
+ record
29
+ end
30
+
31
+ private
32
+
33
+ def truncate_query(record, normalized, max_length)
34
+ return normalized if normalized.length < max_length
35
+
36
+ record['truncated_query'] = true
37
+ # Assume UTF-8 encoding for PostgreSQL queries
38
+ utf8_query = normalized.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
39
+ utf8_query[0, max_length]
40
+ end
41
+ end
42
+ end
@@ -27,7 +27,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
27
27
  inputs.each { |input| d.feed(input) }
28
28
  end
29
29
 
30
- assert_equal(%w[duration_s fingerprint message sql], d.filtered[0].last.keys.sort)
30
+ assert_equal(%w[duration_s fingerprint message query_length sql], d.filtered[0].last.keys.sort)
31
31
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
32
32
  end
33
33
 
@@ -47,7 +47,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
47
47
  inputs.each { |input| d.feed(input) }
48
48
  end
49
49
 
50
- assert_equal(%w[duration_s fingerprint message sql truncated_query], d.filtered[0].last.keys.sort)
50
+ assert_equal(%w[duration_s fingerprint message query_length sql truncated_query], d.filtered[0].last.keys.sort)
51
51
  assert_equal('SELECT * F', d.filtered[0].last['sql'])
52
52
  end
53
53
 
@@ -60,7 +60,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
60
60
  d.feed(input)
61
61
  end
62
62
 
63
- assert_equal(%w[pg_query_error query], d.filtered[0].last.keys.sort)
63
+ assert_equal(%w[query query_length query_unparseable], d.filtered[0].last.keys.sort)
64
64
  assert_equal(input['query'], d.filtered[0].last['query'])
65
65
  end
66
66
 
@@ -83,7 +83,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
83
83
  inputs.each { |input| d.feed(input) }
84
84
  end
85
85
 
86
- assert_equal(%w[duration_s fingerprint message out_sql], d.filtered[0].last.keys.sort)
86
+ assert_equal(%w[duration_s fingerprint message out_sql query_length], d.filtered[0].last.keys.sort)
87
87
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['out_sql'])
88
88
  end
89
89
  end
@@ -109,7 +109,6 @@ class PgStatActivityInputTest < Test::Unit::TestCase
109
109
  'pid' => nil,
110
110
  'query' => nil,
111
111
  'query_age_s' => nil,
112
- 'query_length' => nil,
113
112
  'query_start' => nil,
114
113
  'state' => nil,
115
114
  'state_age_s' => nil,
@@ -159,7 +158,7 @@ class PgStatActivityInputTest < Test::Unit::TestCase
159
158
  'datid' => 16384,
160
159
  'datname' => 'testuser',
161
160
  'pid' => 376,
162
- 'query' => nil,
161
+ 'query' => "SELECT * FROM users WHERE user_se=",
163
162
  'query_age_s' => 0.001894,
164
163
  'query_length' => 34,
165
164
  'query_start' => '2021-07-23T12:55:25.000+00:00',
@@ -77,8 +77,8 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
77
77
  record = d.instance.record_for_row(row)
78
78
 
79
79
  expected = {
80
- 'query_length' => nil,
81
80
  'queryid' => '1234',
81
+ 'query' => nil,
82
82
  'calls' => nil,
83
83
  'rows' => nil,
84
84
  'total_time_ms' => nil
@@ -153,6 +153,7 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
153
153
  expected = {
154
154
  'query_length' => 13,
155
155
  'query_unparseable' => true,
156
+ 'query' => 'SELECT * FROM',
156
157
  'queryid' => '1234',
157
158
  'calls' => nil,
158
159
  'rows' => nil,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-07 00:00:00.000000000 Z
11
+ date: 2024-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -111,6 +111,7 @@ files:
111
111
  - lib/fluent/plugin/marginalia_extractor.rb
112
112
  - lib/fluent/plugin/parser_multiline_csv.rb
113
113
  - lib/fluent/plugin/polling_pg_input_plugin.rb
114
+ - lib/fluent/plugin/query_normalizer.rb
114
115
  - sql/create_extension.sql
115
116
  - test/helper.rb
116
117
  - test/plugin/itest_in_pg_stat_activity.rb