fluent-plugin-postgresql-csvlog 0.4.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1cc2264997378ec801c6623768d57ca27ce1c28d6ac8ffcb6a905add2ce9f16
4
- data.tar.gz: 79aab5d187209db5790f9bca181ea0ca8c909c808809590ad46cef0f6645902c
3
+ metadata.gz: 5fd63aaf4685f342a67b1ae1393d6f0ab7c4fab2d76b0e1b8f65a20d544c7fbf
4
+ data.tar.gz: faca443f2ff5b0bba6baa960918619e21ba03d86055cbb6a38d27278e56d903c
5
5
  SHA512:
6
- metadata.gz: e38e47908ac33706c4b6679fabc97a395bb63f8a110cb6a836c93f984dbfaf1f77f2204209cfe8a0609ddf969ab960cd855ce363398388084a0026e5df0c4970
7
- data.tar.gz: 94505a6d7ceee368c22b0c32081b85aaf215d28217ceed1ff8f58bab840367f5afe19ed725056394e57fd1d6ebfe3ae98fba66b39eb781cfa2f1f80a602385c6
6
+ metadata.gz: de5e28ed56dc6fcfeb7f0631538e796561556cadd3d5b03a3510cd4f575291c51bf9c335d1f49415569a7c8bdd217e1cb22a411258fe6cbeed023cad15cb2232
7
+ data.tar.gz: 176678ab47f25823b1dbce6bec33e4d86ee5c02ae72a7370541d399d91a0aa79cd9b575fb4a75a3fe53f0cf4400b7b55cd0102e071de7c52279e6b936f03ec20
data/.gitlab-ci.yml CHANGED
@@ -10,10 +10,9 @@ test:
10
10
  paths:
11
11
  - vendor/ruby
12
12
 
13
- # integration tests
14
- itest:
13
+ .iteration_test:
15
14
  services:
16
- - name: postgres:12
15
+ - name: postgres:$POSTGRES_SERVER_VERSION
17
16
  alias: postgres
18
17
  command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
18
  variables:
@@ -28,6 +27,18 @@ itest:
28
27
  paths:
29
28
  - vendor/ruby
30
29
 
30
+ # integration tests for postgres 12
31
+ itest_pg12:
32
+ extends: .iteration_test
33
+ variables:
34
+ POSTGRES_SERVER_VERSION: 12
35
+
36
+ # integration tests for postgres 13
37
+ itest_pg13:
38
+ extends: .iteration_test
39
+ variables:
40
+ POSTGRES_SERVER_VERSION: 13
41
+
31
42
  end_to_end_verification_test:
32
43
  image: docker:19.03.12
33
44
  services:
data/README.md CHANGED
@@ -80,7 +80,8 @@ ingest and parse PostgreSQL CSV logs:
80
80
  To develop and debug locally, there is a `Dockerfile` and `docker-compose.yml` that will setup a local environment,
81
81
  complete with Postgres, suitable for testing purposes.
82
82
 
83
- 1. `docker compose rm verifier --rm` - test the current configuration
83
+ 1. `docker compose build` - build the current configuration
84
+ 1. `docker compose run --rm verifier` - test the current configuration
84
85
  1. `docker compose up`
85
86
 
86
87
  ### Releasing a new version
data/docker-compose.yml CHANGED
@@ -2,7 +2,7 @@
2
2
  version: "3.3"
3
3
  services:
4
4
  postgres:
5
- image: postgres
5
+ image: postgres:13
6
6
  restart: "no"
7
7
  environment:
8
8
  - POSTGRES_USER=testuser
data/example-fluentd.conf CHANGED
@@ -13,6 +13,10 @@
13
13
  time_slice_format %Y%m%d%H%M%S
14
14
  flush_interval 1s
15
15
  utc
16
+
17
+ <format>
18
+ @type json
19
+ </format>
16
20
  </match>
17
21
 
18
22
  <source>
@@ -23,3 +27,15 @@
23
27
  password testpass
24
28
  interval 1
25
29
  </source>
30
+
31
+ <match postgres.pg_stat_activity>
32
+ @type file
33
+ path /var/log/pg/pg_stat_activity
34
+ time_slice_format %Y%m%d%H%M%S
35
+ flush_interval 1s
36
+ utc
37
+ <format>
38
+ @type json
39
+ </format>
40
+ </match>
41
+
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.4.0'
5
+ s.version = '0.7.1'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
18
  s.add_dependency 'pg', '~> 1.1'
19
- s.add_dependency 'pg_query', '~> 2.1'
19
+ s.add_dependency 'pg_query', '~> 2.0'
20
20
 
21
21
  s.add_development_dependency 'rake'
22
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -11,7 +11,7 @@ module Fluent::Plugin
11
11
  # 'fingerprint' => '8a6e9896bd9048a2',
12
12
  # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
13
13
  # 'query_length' => 58,
14
- # 'queryid' => 3239318621761098074
14
+ # 'queryid' => '3239318621761098074'
15
15
  # }
16
16
  class PgStatStatementsInput < PollingPostgresInputPlugin
17
17
  Fluent::Plugin.register_input('pg_stat_statements', self)
@@ -19,6 +19,26 @@ module Fluent::Plugin
19
19
  desc 'Name of field to store SQL query fingerprint'
20
20
  config_param :fingerprint_key, :string, default: 'fingerprint'
21
21
 
22
+ POSTGRES_SERVER_VERSION_QUERY = "SELECT current_setting('server_version_num')"
23
+
24
+ PG12_STAT_STATEMENTS_QUERY = <<-SQL
25
+ SELECT queryid,
26
+ query,
27
+ calls,
28
+ rows,
29
+ total_time
30
+ FROM public.pg_stat_statements
31
+ SQL
32
+
33
+ PG13_STAT_STATEMENTS_QUERY = <<-SQL
34
+ SELECT queryid,
35
+ query,
36
+ calls,
37
+ rows,
38
+ (total_plan_time + total_exec_time) total_time
39
+ FROM public.pg_stat_statements
40
+ SQL
41
+
22
42
  protected
23
43
 
24
44
  def on_poll
@@ -29,13 +49,24 @@ module Fluent::Plugin
29
49
 
30
50
  public
31
51
 
52
+ def initialize
53
+ super
54
+ @postgres_server_version_num = nil
55
+ end
56
+
32
57
  # Returns a fluentd record for a query row
33
58
  def record_for_row(row)
34
59
  query = row['query']
35
60
 
36
61
  # We record the query_length as it will help in understanding whether unparseable
37
62
  # queries are truncated.
38
- record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
63
+ record = {
64
+ 'queryid' => row['queryid'].to_s,
65
+ 'query_length' => query&.length,
66
+ 'calls' => row['calls']&.to_i,
67
+ 'total_time_ms' => row['total_time']&.to_f,
68
+ 'rows' => row['rows']&.to_i
69
+ }
39
70
 
40
71
  return record unless query
41
72
 
@@ -56,12 +87,32 @@ module Fluent::Plugin
56
87
  me = Fluent::MultiEventStream.new
57
88
 
58
89
  now = Fluent::Engine.now
59
- conn.exec('SELECT queryid, query FROM public.pg_stat_statements').each do |row|
90
+
91
+ query = query_for_postgres_version(conn)
92
+
93
+ conn.exec(query).each do |row|
60
94
  record = record_for_row(row)
61
95
  me.add(now, record)
62
96
  end
63
97
 
64
98
  @router.emit_stream(@tag, me)
65
99
  end
100
+
101
+ # Returns the PG_VERSION_NUM value from the database
102
+ # will memoize the result
103
+ def postgres_server_version_num(conn)
104
+ return @postgres_server_version_num if @postgres_server_version_num
105
+
106
+ @postgres_server_version_num = conn.exec(POSTGRES_SERVER_VERSION_QUERY).getvalue(0,0).to_i
107
+ end
108
+
109
+ # pg_stat_statements columns changed in pg13, so we use different queries depending on the version
110
+ # https://www.postgresql.org/docs/12/pgstatstatements.html
111
+ # https://www.postgresql.org/docs/13/pgstatstatements.html
112
+ def query_for_postgres_version(conn)
113
+ return PG13_STAT_STATEMENTS_QUERY if postgres_server_version_num(conn) >= 13_00_00
114
+
115
+ PG12_STAT_STATEMENTS_QUERY
116
+ end
66
117
  end
67
118
  end
@@ -7,7 +7,7 @@ module Fluent::Plugin
7
7
  # utility method, useful for extracting marginalia into fluentd records
8
8
  module MarginaliaExtractor
9
9
  MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
10
- MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
10
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)\s*;?\s*$}m.freeze
11
11
 
12
12
  # Injects marginalia into a fluentd record
13
13
  def parse_marginalia_into_record(record, key, strip_comment)
@@ -60,7 +60,7 @@ module Fluent::Plugin
60
60
  on_poll
61
61
  rescue StandardError => e
62
62
  log.error 'unexpected error', error: e.message, error_class: e.class
63
- log.error_backtrace e.backtrace
63
+ log.error_backtrace
64
64
  end
65
65
  end
66
66
  end
@@ -98,8 +98,7 @@ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
98
98
  expected_record = {
99
99
  'fingerprint' => '8a6e9896bd9048a2',
100
100
  'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
101
- 'query_length' => 58,
102
- 'queryid' => 3_239_318_621_761_098_074
101
+ 'query_length' => 58
103
102
  }
104
103
  known_statement_event = emits.find do |event|
105
104
  record = event[2]
@@ -114,7 +113,8 @@ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
114
113
  assert_equal 'postgres.pg_stat_statements', tag
115
114
  assert_equal expected_record['fingerprint'], record['fingerprint']
116
115
  assert_equal expected_record['query_length'], record['query_length']
117
- assert_true expected_record.include? 'queryid'
116
+ assert_true record.include? 'queryid'
117
+ assert_true record['queryid'].is_a? String
118
118
  end
119
119
  end
120
120
  end
@@ -42,13 +42,24 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
42
42
  sub_test_case 'execution' do
43
43
  test 'sql' do
44
44
  d = create_driver
45
- record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
45
+ row = {
46
+ 'queryid' => '1234',
47
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
48
+ 'calls' => 22,
49
+ 'rows' => 333,
50
+ 'total_time' => 44.44
51
+ }
52
+
53
+ record = d.instance.record_for_row(row)
46
54
 
47
55
  expected = {
48
56
  'fingerprint' => 'c071dee80d466e7d',
49
57
  'query' => 'SELECT * FROM users WHERE user_id = ?',
50
58
  'query_length' => 37,
51
- 'queryid' => '1234'
59
+ 'queryid' => '1234',
60
+ 'calls' => 22,
61
+ 'rows' => 333,
62
+ 'total_time_ms' => 44.44
52
63
  }
53
64
 
54
65
  assert_equal expected, record
@@ -56,9 +67,22 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
56
67
 
57
68
  test 'nil query' do
58
69
  d = create_driver
59
- record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
70
+ row = {
71
+ 'queryid' => '1234',
72
+ 'query' => nil,
73
+ 'calls' => nil,
74
+ 'rows' => nil,
75
+ 'total_time' => nil
76
+ }
77
+ record = d.instance.record_for_row(row)
60
78
 
61
- expected = { 'query_length' => nil, 'queryid' => '1234' }
79
+ expected = {
80
+ 'query_length' => nil,
81
+ 'queryid' => '1234',
82
+ 'calls' => nil,
83
+ 'rows' => nil,
84
+ 'total_time_ms' => nil
85
+ }
62
86
  assert_equal expected, record
63
87
  end
64
88
 
@@ -75,26 +99,48 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
75
99
  )
76
100
  SQL
77
101
 
78
- record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
102
+ row = {
103
+ 'queryid' => 1234,
104
+ 'query' => ddl_sql,
105
+ 'calls' => 22,
106
+ 'rows' => 333,
107
+ 'total_time' => 44.44
108
+ }
109
+
110
+ record = d.instance.record_for_row(row)
79
111
 
80
112
  expected = {
81
113
  'fingerprint' => 'fa9c9d26757c4f9b',
82
114
  'query' => ddl_sql,
83
115
  'query_length' => 287,
84
- 'queryid' => '1234'
116
+ 'queryid' => '1234',
117
+ 'calls' => 22,
118
+ 'rows' => 333,
119
+ 'total_time_ms' => 44.44
85
120
  }
86
121
  assert_equal expected, record
87
122
  end
88
123
 
89
124
  test 'set command' do
90
125
  d = create_driver
91
- record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
126
+ row = {
127
+ 'queryid' => 1234,
128
+ 'query' => "SET TIME ZONE 'PST8PDT'",
129
+ 'calls' => 22,
130
+ 'rows' => 333,
131
+ 'total_time' => 44.44
132
+ }
133
+
134
+ record = d.instance.record_for_row(row)
92
135
 
93
136
  expected = {
94
137
  'fingerprint' => '23f8d6eb1d3125c3',
95
138
  'query' => 'SET TIME ZONE $1',
96
139
  'query_length' => 23,
97
- 'queryid' => '1234'
140
+ 'queryid' => '1234',
141
+ 'calls' => 22,
142
+ 'rows' => 333,
143
+ 'total_time_ms' => 44.44
98
144
  }
99
145
 
100
146
  assert_equal expected, record
@@ -102,9 +148,17 @@ class PgStatStatementsInputTest < Test::Unit::TestCase
102
148
 
103
149
  test 'unparseable sql' do
104
150
  d = create_driver
105
- record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
151
+ record = d.instance.record_for_row({ 'queryid' => 1234, 'query' => 'SELECT * FROM' })
152
+
153
+ expected = {
154
+ 'query_length' => 13,
155
+ 'query_unparseable' => true,
156
+ 'queryid' => '1234',
157
+ 'calls' => nil,
158
+ 'rows' => nil,
159
+ 'total_time_ms' => nil
160
+ }
106
161
 
107
- expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
162
  assert_equal expected, record
109
163
  end
110
164
  end
@@ -56,6 +56,14 @@ class Marginalia < Test::Unit::TestCase
56
56
  test_parse(sql, {}, 'sql', true, expected)
57
57
  end
58
58
 
59
+ test 'normal comment appended with trailing semicolon' do
60
+ sql = 'SELECT COUNT(*) FROM "projects" /* this is just a comment */ ; '
61
+ expected = {
62
+ "sql" => 'SELECT COUNT(*) FROM "projects"'
63
+ }
64
+ test_parse(sql, {}, 'sql', true, expected)
65
+ end
66
+
59
67
  test 'marginalia prepended for sidekiq' do
60
68
  sql = '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"'
61
69
  expected = {
@@ -5,7 +5,8 @@
5
5
 
6
6
  cleanup() {
7
7
  echo "# removing all logs"
8
- rm -rf /var/log/pg/*
8
+ find /var/log/pg/ -name "pg_stat_statements.*.log" -delete
9
+ find /var/log/pg/ -name "pg_stat_activity.*.log" -delete
9
10
  }
10
11
 
11
12
  die() {
@@ -17,6 +18,15 @@ die() {
17
18
  cleanup
18
19
  echo "# sleeping 10, awaiting logs"
19
20
  sleep 10;
20
- find /var/log/pg/ -name "pg_stat_statements.*.log" || die "No pg_stat_statements files created"
21
+
22
+ echo "# looking for pg_stat_statements"
23
+
24
+ (find /var/log/pg/ -name "pg_stat_statements.*.log" | grep . >/dev/null) || die "No pg_stat_statements files created"
25
+ cat /var/log/pg/pg_stat_statements.*.log | tail -10
26
+
27
+ echo "# looking for pg_stat_activity"
28
+
29
+ (find /var/log/pg/ -name "pg_stat_activity.*.log" | grep . >/dev/null) || die "No pg_stat_activity files created"
30
+ cat /var/log/pg/pg_stat_activity.*.log | tail -10
21
31
 
22
32
  cleanup
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-23 00:00:00.000000000 Z
11
+ date: 2021-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -50,14 +50,14 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '2.1'
53
+ version: '2.0'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '2.1'
60
+ version: '2.0'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rake
63
63
  requirement: !ruby/object:Gem::Requirement
@@ -125,7 +125,7 @@ files:
125
125
  homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
126
126
  licenses: []
127
127
  metadata: {}
128
- post_install_message:
128
+ post_install_message:
129
129
  rdoc_options: []
130
130
  require_paths:
131
131
  - lib
@@ -140,8 +140,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
140
  - !ruby/object:Gem::Version
141
141
  version: '0'
142
142
  requirements: []
143
- rubygems_version: 3.1.4
144
- signing_key:
143
+ rubygems_version: 3.2.28
144
+ signing_key:
145
145
  specification_version: 4
146
146
  summary: fluentd plugins to work with PostgreSQL CSV logs
147
147
  test_files: