fluent-plugin-postgresql-csvlog 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c99895582e6133a97c194d676882f42d0fad0ab52aff6cfe1b3fbbbcf32263a
4
- data.tar.gz: 71135c5dddefbf8a61f8f6e3453daddd61df187b082201fb00dff4680ce0bdbe
3
+ metadata.gz: bb322f50f848a7b196962dae916a75f30a77f1461167bb314494ae203f8ef9d6
4
+ data.tar.gz: 077266e6c7f3589813d0c4db8ddccfde76f7d6ecd0b6d6388b67a4e6328ad858
5
5
  SHA512:
6
- metadata.gz: 518a30b929f02f82a6a6d80d03a4a1a9fa66174759c8f9b89a3243a7bf4d8168a43fe08631882630790afea4fb24eed4179215f85d7454c8fc63fc6e2aae1c6e
7
- data.tar.gz: 43921a20185c91042274d0acbbd4c70ef94cb7c199675833890ab2cc946aa49065b8c0a624eb6691684e55bfa0945a02e9996f305211657ffae492e856deebe2
6
+ metadata.gz: 9b130aa84b1285a62466b3bf68ff6d02acc7e95cd53cc91ee7e34ec81ee4440e06358738790f86f8d436cb8a72ad232f12ac2c0f9d8aca16fd5c6ba19bbdfe49
7
+ data.tar.gz: e9480110be0cadfad86c5fcc554eb5d1d7264559714e4fd88ea396ae9bb913027b15782c70cd2f4c06e1f4cc5a5bbafe55632ca8eba02ce57e610d98f8440c09
data/.gitlab-ci.yml CHANGED
@@ -9,3 +9,21 @@ test:
9
9
  cache:
10
10
  paths:
11
11
  - vendor/ruby
12
+
13
+ # integration tests
14
+ itest:
15
+ services:
16
+ - name: postgres:12
17
+ alias: postgres
18
+ command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
+ variables:
20
+ POSTGRES_USER: testuser
21
+ POSTGRES_PASSWORD: testpass
22
+ before_script:
23
+ - bundle config set path vendor
24
+ - bundle install --jobs $(nproc)
25
+ script:
26
+ - bundle exec rake itest
27
+ cache:
28
+ paths:
29
+ - vendor/ruby
data/Rakefile CHANGED
@@ -9,4 +9,10 @@ Rake::TestTask.new(:test) do |test|
9
9
  test.verbose = true
10
10
  end
11
11
 
12
+ Rake::TestTask.new(:itest) do |test|
13
+ test.libs << 'lib' << 'test'
14
+ test.test_files = FileList['test/**/itest_*.rb']
15
+ test.verbose = true
16
+ end
17
+
12
18
  task :default => [:build]
@@ -2,10 +2,10 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.2.1'
5
+ s.version = '0.3.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
- s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
9
9
  s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
10
  s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
11
 
@@ -15,6 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
+ s.add_dependency 'pg', '~> 1.1'
18
19
  s.add_dependency 'pg_query', '~> 2.0'
19
20
 
20
21
  s.add_development_dependency 'rake'
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent
8
+ # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
9
+ # for queryid to query mappings. These are then normalized for security purposes
10
+ # fingerprinted and emitted as records with the following format:
11
+ # {
12
+ # 'fingerprint' => '8a6e9896bd9048a2',
13
+ # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
14
+ # 'query_length' => 58,
15
+ # 'queryid' => 3239318621761098074
16
+ # }
17
+ class PgStatStatementsInput < Input
18
+ Fluent::Plugin.register_input('pg_stat_statements', self)
19
+
20
+ desc 'PostgreSQL host'
21
+ config_param :host, :string
22
+
23
+ desc 'RDBMS port (default: 5432)'
24
+ config_param :port, :integer, default: 5432
25
+
26
+ desc 'login user name'
27
+ config_param :username, :string, default: nil
28
+
29
+ desc 'postgres db'
30
+ config_param :dbname, :string, default: nil
31
+
32
+ desc 'login password'
33
+ config_param :password, :string, default: nil, secret: true
34
+
35
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
+ # for options
37
+ desc 'postgres sslmode'
38
+ config_param :sslmode, :string, default: 'prefer'
39
+
40
+ desc 'tag'
41
+ config_param :tag, :string, default: nil
42
+
43
+ desc 'interval in second to run query'
44
+ config_param :interval, :time, default: 300
45
+
46
+ desc 'Name of field to store SQL query fingerprint'
47
+ config_param :fingerprint_key, :string, default: 'fingerprint'
48
+
49
+ def start
50
+ @stop_flag = false
51
+ @thread = Thread.new(&method(:thread_main))
52
+ end
53
+
54
+ def shutdown
55
+ @stop_flag = true
56
+
57
+ # Interrupt thread and wait for it to finish
58
+ Thread.new { @thread.run } if @thread
59
+ @thread.join
60
+ end
61
+
62
+ def thread_main
63
+ until @stop_flag
64
+ sleep @interval
65
+ break if @stop_flag
66
+
67
+ begin
68
+ with_connection do |conn|
69
+ emit_statements_to_stream(conn)
70
+ end
71
+ rescue StandardError => e
72
+ log.error 'unexpected error', error: e.message, error_class: e.class
73
+ log.error_backtrace e.backtrace
74
+ end
75
+ end
76
+ end
77
+
78
+ # Returns a fluentd record for a query row
79
+ def record_for_row(row)
80
+ query = row['query']
81
+
82
+ # We record the query_length as it will help in understanding whether unparseable
83
+ # queries are truncated.
84
+ record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
85
+
86
+ return record unless query
87
+
88
+ normalized = PgQuery.normalize(query)
89
+ record['query'] = normalized
90
+
91
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
92
+
93
+ record
94
+ rescue PgQuery::ParseError
95
+ record['query_unparseable'] = true
96
+
97
+ record
98
+ end
99
+
100
+ private
101
+
102
+ # Query the database and emit statements to fluentd router
103
+ def emit_statements_to_stream(conn)
104
+ me = MultiEventStream.new
105
+
106
+ now = Engine.now
107
+ conn.exec('SELECT queryid, query FROM pg_stat_statements').each do |row|
108
+ record = record_for_row(row)
109
+ me.add(now, record)
110
+ end
111
+
112
+ @router.emit_stream(@tag, me)
113
+ end
114
+
115
+ # Since this query is very infrequent, and it may be communicating directly
116
+ # with postgres without pgbouncer, don't use a persistent connection and
117
+ # ensure that it is properly closed
118
+ def with_connection(&block)
119
+ conn = PG.connect(
120
+ host: @host,
121
+ dbname: @dbname,
122
+ sslmode: @sslmode,
123
+ user: @username,
124
+ password: @password
125
+ )
126
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
+
128
+ begin
129
+ block.call(conn)
130
+ ensure
131
+ # Always close the connection
132
+ conn.finish
133
+ end
134
+ end
135
+ end
136
+ end
data/test/helper.rb CHANGED
@@ -7,10 +7,13 @@ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
7
  $LOAD_PATH.unshift(__dir__)
8
8
  require 'fluent/test'
9
9
  require 'fluent/test/driver/filter'
10
+ require 'fluent/test/driver/input'
10
11
  require 'fluent/test/helpers'
11
12
 
12
13
  Test::Unit::TestCase.include(Fluent::Test::Helpers)
14
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
13
15
 
14
16
  require 'fluent/plugin/filter_postgresql_slowlog'
15
17
  require 'fluent/plugin/filter_postgresql_redactor'
16
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/input_pg_stat_statements'
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
6
+ # These items are configured in .gitlab-ci.yml on the postgres service
7
+ HOST = 'postgres'
8
+ USERNAME = 'testuser'
9
+ PASSWORD = 'testpass'
10
+
11
+ def setup
12
+ Fluent::Test.setup
13
+
14
+ @conn = PG.connect(
15
+ host: HOST,
16
+ user: USERNAME,
17
+ password: PASSWORD
18
+ )
19
+
20
+ try_setup_extension
21
+ create_known_statement
22
+ end
23
+
24
+ def teardown
25
+ @conn&.finish
26
+ end
27
+
28
+ # Setup pg_stat_statements extension
29
+ def try_setup_extension
30
+ @conn.exec('CREATE EXTENSION pg_stat_statements')
31
+ rescue PG::DuplicateObject
32
+ end
33
+
34
+ # This statement gives us something to look for in the emitted stream
35
+ def create_known_statement
36
+ @conn.exec('SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT 1')
37
+ end
38
+
39
+ VALID_CONFIG = %(
40
+ tag postgres.pg_stat_statements
41
+ host #{HOST}
42
+ username #{USERNAME}
43
+ password #{PASSWORD}
44
+ interval 1
45
+ )
46
+
47
+ INVALID_CONFIG = %(
48
+ host 'invalid_host.dne'
49
+ port 1234
50
+ username #{USERNAME}
51
+ password #{PASSWORD}
52
+ interval 1
53
+ )
54
+
55
+ def create_driver(config)
56
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(config)
57
+ end
58
+
59
+ sub_test_case 'configuration' do
60
+ test 'connects' do
61
+ d = create_driver(VALID_CONFIG)
62
+
63
+ emits = []
64
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
65
+ d.run(num_waits = 50) do
66
+ emits = d.emits
67
+ end
68
+
69
+ assert_false emits.empty?
70
+ end
71
+
72
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
73
+ # the fluentd configuration to fail. We would rather retry until we get a connection
74
+ test 'connects for an invalid config' do
75
+ d = create_driver(INVALID_CONFIG)
76
+
77
+ emits = []
78
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
79
+ d.run(num_waits = 50) do
80
+ emits = d.emits
81
+ end
82
+
83
+ assert_true emits.empty?
84
+ end
85
+ end
86
+
87
+ sub_test_case 'execution' do
88
+ test 'connects' do
89
+ d = create_driver(VALID_CONFIG)
90
+
91
+ emits = []
92
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
93
+ d.run(num_waits = 50) do
94
+ emits = d.emits
95
+ end
96
+
97
+ expected_record = {
98
+ 'fingerprint' => '8a6e9896bd9048a2',
99
+ 'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
100
+ 'query_length' => 58,
101
+ 'queryid' => 3_239_318_621_761_098_074
102
+ }
103
+ known_statement_event = emits.find do |event|
104
+ record = event[2]
105
+ record['query'] == expected_record['query']
106
+ end
107
+
108
+ assert_false known_statement_event.nil?
109
+
110
+ tag = known_statement_event[0]
111
+ record = known_statement_event[2]
112
+
113
+ assert_equal 'postgres.pg_stat_statements', tag
114
+ assert_equal expected_record['fingerprint'], record['fingerprint']
115
+ assert_equal expected_record['query_length'], record['query_length']
116
+ assert_true expected_record.include? 'queryid'
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %(
11
+ tag postgres.pg_stat_statements
12
+ host localhost
13
+ port 1234
14
+ dbname gitlab
15
+ sslmode require
16
+ username moo
17
+ password secret
18
+ interval 600
19
+ fingerprint_key fingerprint
20
+ )
21
+
22
+ def create_driver
23
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(CONFIG)
24
+ end
25
+
26
+ sub_test_case 'configuration' do
27
+ test 'basic configuration' do
28
+ d = create_driver
29
+
30
+ assert_equal 'postgres.pg_stat_statements', d.instance.tag
31
+ assert_equal 'localhost', d.instance.host
32
+ assert_equal 1234, d.instance.port
33
+ assert_equal 'gitlab', d.instance.dbname
34
+ assert_equal 'require', d.instance.sslmode
35
+ assert_equal 'moo', d.instance.username
36
+ assert_equal 'secret', d.instance.password
37
+ assert_equal 600, d.instance.interval
38
+ assert_equal 'fingerprint', d.instance.fingerprint_key
39
+ end
40
+ end
41
+
42
+ sub_test_case 'execution' do
43
+ test 'sql' do
44
+ d = create_driver
45
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
46
+
47
+ expected = {
48
+ 'fingerprint' => 'c071dee80d466e7d',
49
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
50
+ 'query_length' => 37,
51
+ 'queryid' => '1234'
52
+ }
53
+
54
+ assert_equal expected, record
55
+ end
56
+
57
+ test 'nil query' do
58
+ d = create_driver
59
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
60
+
61
+ expected = { 'query_length' => nil, 'queryid' => '1234' }
62
+ assert_equal expected, record
63
+ end
64
+
65
+ test 'ddl query' do
66
+ d = create_driver
67
+ ddl_sql = <<-SQL
68
+ CREATE TABLE accounts (
69
+ user_id serial PRIMARY KEY,
70
+ username VARCHAR(50) UNIQUE NOT NULL,
71
+ password VARCHAR(50) NOT NULL,
72
+ email VARCHAR(255) UNIQUE NOT NULL,
73
+ created_on TIMESTAMP NOT NULL,
74
+ last_login TIMESTAMP
75
+ )
76
+ SQL
77
+
78
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
79
+
80
+ expected = {
81
+ 'fingerprint' => 'fa9c9d26757c4f9b',
82
+ 'query' => ddl_sql,
83
+ 'query_length' => 287,
84
+ 'queryid' => '1234'
85
+ }
86
+ assert_equal expected, record
87
+ end
88
+
89
+ test 'set command' do
90
+ d = create_driver
91
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
92
+
93
+ expected = {
94
+ 'fingerprint' => '23f8d6eb1d3125c3',
95
+ 'query' => 'SET TIME ZONE $1',
96
+ 'query_length' => 23,
97
+ 'queryid' => '1234'
98
+ }
99
+
100
+ assert_equal expected, record
101
+ end
102
+
103
+ test 'unparseable sql' do
104
+ d = create_driver
105
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
106
+
107
+ expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
+ assert_equal expected, record
109
+ end
110
+ end
111
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-23 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -30,6 +30,20 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: pg_query
35
49
  requirement: !ruby/object:Gem::Requirement
@@ -88,12 +102,15 @@ files:
88
102
  - lib/fluent/plugin/filter_marginalia.rb
89
103
  - lib/fluent/plugin/filter_postgresql_redactor.rb
90
104
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
105
+ - lib/fluent/plugin/input_pg_stat_statements.rb
91
106
  - lib/fluent/plugin/parser_multiline_csv.rb
92
107
  - test/helper.rb
108
+ - test/plugin/itest_input_pg_stat_statements.rb
93
109
  - test/plugin/test_filter_marginalia.rb
94
110
  - test/plugin/test_filter_postgresql_redactor.rb
95
111
  - test/plugin/test_filter_postgresql_slowlog.rb
96
- homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
112
+ - test/plugin/test_input_pg_stat_statements.rb
113
+ homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
97
114
  licenses: []
98
115
  metadata: {}
99
116
  post_install_message:
@@ -117,6 +134,8 @@ specification_version: 4
117
134
  summary: fluentd plugins to work with PostgreSQL CSV logs
118
135
  test_files:
119
136
  - test/helper.rb
137
+ - test/plugin/itest_input_pg_stat_statements.rb
120
138
  - test/plugin/test_filter_marginalia.rb
121
139
  - test/plugin/test_filter_postgresql_redactor.rb
122
140
  - test/plugin/test_filter_postgresql_slowlog.rb
141
+ - test/plugin/test_input_pg_stat_statements.rb