fluent-plugin-postgresql-csvlog 0.10.0 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2393ec76502b4127cd874027d99eada9e184dba85edfec3c2a9cfe1a88de54ee
4
- data.tar.gz: 2f61b0b0d4e27a302f90449af833dba0e985406d98f15be773ac379146a958d1
3
+ metadata.gz: af7b81970b58d90863db57c0386b096ca5fe62f4b5c100cd62a6a40b416c15a1
4
+ data.tar.gz: 53fdf00e4405b46f5db1a40abc29a1d8e1ffe2daeb9f3597027f96fb9fb2d1c0
5
5
  SHA512:
6
- metadata.gz: b537e7458f9fa38bb0b88672e137a345739f22ab92f8a3d9fe3cf0e26f9e958995c2f7fdb51afc649ac5f55e608ddd431ef904dd757e651f1b223be58703d7d6
7
- data.tar.gz: 7724c6961993e7e50ef38ebfa22b874fd4578eb8d8057d2cac76fe761f723a5631e1a227c791c7debf24c2631bc4eea1c2febe929f723a5584c52adbb2b91a49
6
+ metadata.gz: f69402f67122706714e07283efc6b131f8c8b4f06851ed77b01d5188d305dd9d57e5e62a3d732cba0272b06db6c5e266d953fb19edb5af27d4396e715ca05be7
7
+ data.tar.gz: ae1f30e585ace5fdba2d81921f22eec5f2a99a0ae3cec8ae71e2e19fa27b89e10f4f81e7057edbea913582920dc3d4bdcb81195a42e13c21077bccd1667da368
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.10.0'
5
+ s.version = '0.11.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'fluent/plugin/filter'
4
- require 'pg_query'
4
+
5
+ require_relative './query_normalizer'
5
6
 
6
7
  module Fluent::Plugin
7
8
  class PostgreSQLRedactor < Filter
@@ -19,32 +20,17 @@ module Fluent::Plugin
19
20
  desc 'Truncate the query if it exceeds the number of bytes'
20
21
  config_param :max_length, :integer, default: 3 * 1024 * 1024
21
22
 
22
- def filter(_tag, _time, record)
23
- statement = record[@input_key]
24
-
25
- return record unless statement
26
-
27
- normalized = PgQuery.normalize(statement)
28
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
29
-
30
- record.delete(@input_key)
31
- record[@output_key] = truncate_query(record, normalized)
23
+ include QueryNormalizer
32
24
 
33
- record
34
- rescue PgQuery::ParseError
35
- record['pg_query_error'] = true
36
- record
37
- end
38
-
39
- private
40
-
41
- def truncate_query(record, normalized)
42
- return normalized if normalized.length < @max_length
43
-
44
- record['truncated_query'] = true
45
- # Assume UTF-8 encoding for PostgreSQL queries
46
- utf8_query = normalized.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
47
- utf8_query[0, @max_length]
25
+ def filter(_tag, _time, record)
26
+ opts = {
27
+ input_key: @input_key,
28
+ output_key: @output_key,
29
+ fingerprint_key: @fingerprint_key,
30
+ max_length: @max_length
31
+ }
32
+
33
+ normalize_and_fingerprint_query(record, opts)
48
34
  end
49
35
  end
50
36
  end
@@ -12,6 +12,8 @@ module Fluent::Plugin
12
12
  # Fingerprints of the queries are also included for easier aggregation
13
13
  class PgStatActivityInput < PollingPostgresInputPlugin
14
14
  include MarginaliaExtractor
15
+ include QueryNormalizer
16
+
15
17
  Fluent::Plugin.register_input('pg_stat_activity', self)
16
18
 
17
19
  ACTIVITY_QUERY = <<-SQL
@@ -42,6 +44,9 @@ module Fluent::Plugin
42
44
  desc 'Name of field to store SQL query fingerprint'
43
45
  config_param :fingerprint_key, :string, default: 'fingerprint'
44
46
 
47
+ desc 'Truncate the query if it exceeds the number of bytes'
48
+ config_param :max_length, :integer, default: 3 * 1024 * 1024
49
+
45
50
  protected
46
51
 
47
52
  def on_poll
@@ -94,31 +99,14 @@ module Fluent::Plugin
94
99
  # Inject marginalia into record
95
100
  parse_marginalia_into_record(record, 'query', true)
96
101
 
97
- # Normalize query and fingerprint
98
- # Note that `record['query']` was updated in previous step
99
- # To strip off marginalia comments
100
- record.merge!(fingerprint_query(record['query']))
101
-
102
- record
103
- end
104
-
105
- def fingerprint_query(query)
106
- # We record the query_length as it will help in understanding whether unparseable
107
- # queries are truncated.
108
- record = { 'query_length' => query&.length, 'query' => nil }
109
-
110
- return record unless query
111
-
112
- normalized = PgQuery.normalize(query)
113
- record['query'] = normalized
114
-
115
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
116
-
117
- record
118
- rescue PgQuery::ParseError
119
- record['query_unparseable'] = true
102
+ opts = {
103
+ input_key: 'query',
104
+ output_key: 'query',
105
+ fingerprint_key: @fingerprint_key,
106
+ max_length: @max_length
107
+ }
120
108
 
121
- record
109
+ normalize_and_fingerprint_query(record, opts)
122
110
  end
123
111
  end
124
112
  end
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative './polling_pg_input_plugin'
4
- require 'pg_query'
4
+ require_relative './query_normalizer'
5
5
 
6
6
  module Fluent::Plugin
7
7
  # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
@@ -19,6 +19,11 @@ module Fluent::Plugin
19
19
  desc 'Name of field to store SQL query fingerprint'
20
20
  config_param :fingerprint_key, :string, default: 'fingerprint'
21
21
 
22
+ desc 'Truncate the query if it exceeds the number of bytes'
23
+ config_param :max_length, :integer, default: 3 * 1024 * 1024
24
+
25
+ include QueryNormalizer
26
+
22
27
  POSTGRES_SERVER_VERSION_QUERY = "SELECT current_setting('server_version_num')"
23
28
 
24
29
  PG12_STAT_STATEMENTS_QUERY = <<-SQL
@@ -56,30 +61,22 @@ module Fluent::Plugin
56
61
 
57
62
  # Returns a fluentd record for a query row
58
63
  def record_for_row(row)
59
- query = row['query']
60
-
61
- # We record the query_length as it will help in understanding whether unparseable
62
- # queries are truncated.
63
64
  record = {
65
+ 'query' => row['query'],
64
66
  'queryid' => row['queryid'].to_s,
65
- 'query_length' => query&.length,
66
67
  'calls' => row['calls']&.to_i,
67
68
  'total_time_ms' => row['total_time']&.to_f,
68
69
  'rows' => row['rows']&.to_i
69
70
  }
70
71
 
71
- return record unless query
72
-
73
- normalized = PgQuery.normalize(query)
74
- record['query'] = normalized
75
-
76
- record[@fingerprint_key] = PgQuery.fingerprint(normalized) if @fingerprint_key
77
-
78
- record
79
- rescue PgQuery::ParseError
80
- record['query_unparseable'] = true
72
+ opts = {
73
+ input_key: 'query',
74
+ output_key: 'query',
75
+ fingerprint_key: @fingerprint_key,
76
+ max_length: @max_length
77
+ }
81
78
 
82
- record
79
+ normalize_and_fingerprint_query(record, opts)
83
80
  end
84
81
 
85
82
  # Query the database and emit statements to fluentd router
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pg_query'
4
+
5
+ module Fluent::Plugin
6
+ module QueryNormalizer
7
+ def normalize_and_fingerprint_query(record, opts)
8
+ input_key = opts[:input_key]
9
+ output_key = opts[:output_key]
10
+ fingerprint_key = opts[:fingerprint_key]
11
+ max_length = opts[:max_length]
12
+ statement = record[input_key]
13
+
14
+ return record unless statement
15
+
16
+ # We record the query_length as it will help in understanding whether unparseable
17
+ # queries are truncated.
18
+ record['query_length'] = statement&.length
19
+ normalized = PgQuery.normalize(statement)
20
+
21
+ record.delete(input_key)
22
+ record[fingerprint_key] = PgQuery.fingerprint(normalized) if fingerprint_key
23
+ record[output_key] = truncate_query(record, normalized, max_length)
24
+
25
+ record
26
+ rescue PgQuery::ParseError
27
+ record['query_unparseable'] = true
28
+ record
29
+ end
30
+
31
+ private
32
+
33
+ def truncate_query(record, normalized, max_length)
34
+ return normalized if normalized.length < max_length
35
+
36
+ record['truncated_query'] = true
37
+ # Assume UTF-8 encoding for PostgreSQL queries
38
+ utf8_query = normalized.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
39
+ utf8_query[0, max_length]
40
+ end
41
+ end
42
+ end
@@ -27,7 +27,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
27
27
  inputs.each { |input| d.feed(input) }
28
28
  end
29
29
 
30
- assert_equal(%w[duration_s fingerprint message sql], d.filtered[0].last.keys.sort)
30
+ assert_equal(%w[duration_s fingerprint message query_length sql], d.filtered[0].last.keys.sort)
31
31
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['sql'])
32
32
  end
33
33
 
@@ -47,7 +47,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
47
47
  inputs.each { |input| d.feed(input) }
48
48
  end
49
49
 
50
- assert_equal(%w[duration_s fingerprint message sql truncated_query], d.filtered[0].last.keys.sort)
50
+ assert_equal(%w[duration_s fingerprint message query_length sql truncated_query], d.filtered[0].last.keys.sort)
51
51
  assert_equal('SELECT * F', d.filtered[0].last['sql'])
52
52
  end
53
53
 
@@ -60,7 +60,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
60
60
  d.feed(input)
61
61
  end
62
62
 
63
- assert_equal(%w[pg_query_error query], d.filtered[0].last.keys.sort)
63
+ assert_equal(%w[query query_length query_unparseable], d.filtered[0].last.keys.sort)
64
64
  assert_equal(input['query'], d.filtered[0].last['query'])
65
65
  end
66
66
 
@@ -83,7 +83,7 @@ class PostgreSQLRedactorTest < Test::Unit::TestCase
83
83
  inputs.each { |input| d.feed(input) }
84
84
  end
85
85
 
86
- assert_equal(%w[duration_s fingerprint message out_sql], d.filtered[0].last.keys.sort)
86
+ assert_equal(%w[duration_s fingerprint message out_sql query_length], d.filtered[0].last.keys.sort)
87
87
  assert_equal('SELECT * FROM projects WHERE id = $1', d.filtered[0].last['out_sql'])
88
88
  end
89
89
  end
@@ -109,7 +109,6 @@ class PgStatActivityInputTest < Test::Unit::TestCase
109
109
  'pid' => nil,
110
110
  'query' => nil,
111
111
  'query_age_s' => nil,
112
- 'query_length' => nil,
113
112
  'query_start' => nil,
114
113
  'state' => nil,
115
114
  'state_age_s' => nil,
@@ -159,7 +158,7 @@ class PgStatActivityInputTest < Test::Unit::TestCase
159
158
  'datid' => 16384,
160
159
  'datname' => 'testuser',
161
160
  'pid' => 376,
162
- 'query' => nil,
161
+ 'query' => "SELECT * FROM users WHERE user_se=",
163
162
  'query_age_s' => 0.001894,
164
163
  'query_length' => 34,
165
164
  'query_start' => '2021-07-23T12:55:25.000+00:00',
@@ -77,8 +77,8 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
77
77
  record = d.instance.record_for_row(row)
78
78
 
79
79
  expected = {
80
- 'query_length' => nil,
81
80
  'queryid' => '1234',
81
+ 'query' => nil,
82
82
  'calls' => nil,
83
83
  'rows' => nil,
84
84
  'total_time_ms' => nil
@@ -153,6 +153,7 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
153
153
  expected = {
154
154
  'query_length' => 13,
155
155
  'query_unparseable' => true,
156
+ 'query' => 'SELECT * FROM',
156
157
  'queryid' => '1234',
157
158
  'calls' => nil,
158
159
  'rows' => nil,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.0
4
+ version: 0.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-07 00:00:00.000000000 Z
11
+ date: 2024-10-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -111,6 +111,7 @@ files:
111
111
  - lib/fluent/plugin/marginalia_extractor.rb
112
112
  - lib/fluent/plugin/parser_multiline_csv.rb
113
113
  - lib/fluent/plugin/polling_pg_input_plugin.rb
114
+ - lib/fluent/plugin/query_normalizer.rb
114
115
  - sql/create_extension.sql
115
116
  - test/helper.rb
116
117
  - test/plugin/itest_in_pg_stat_activity.rb