fluent-plugin-postgresql-csvlog 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c99895582e6133a97c194d676882f42d0fad0ab52aff6cfe1b3fbbbcf32263a
4
- data.tar.gz: 71135c5dddefbf8a61f8f6e3453daddd61df187b082201fb00dff4680ce0bdbe
3
+ metadata.gz: bb322f50f848a7b196962dae916a75f30a77f1461167bb314494ae203f8ef9d6
4
+ data.tar.gz: 077266e6c7f3589813d0c4db8ddccfde76f7d6ecd0b6d6388b67a4e6328ad858
5
5
  SHA512:
6
- metadata.gz: 518a30b929f02f82a6a6d80d03a4a1a9fa66174759c8f9b89a3243a7bf4d8168a43fe08631882630790afea4fb24eed4179215f85d7454c8fc63fc6e2aae1c6e
7
- data.tar.gz: 43921a20185c91042274d0acbbd4c70ef94cb7c199675833890ab2cc946aa49065b8c0a624eb6691684e55bfa0945a02e9996f305211657ffae492e856deebe2
6
+ metadata.gz: 9b130aa84b1285a62466b3bf68ff6d02acc7e95cd53cc91ee7e34ec81ee4440e06358738790f86f8d436cb8a72ad232f12ac2c0f9d8aca16fd5c6ba19bbdfe49
7
+ data.tar.gz: e9480110be0cadfad86c5fcc554eb5d1d7264559714e4fd88ea396ae9bb913027b15782c70cd2f4c06e1f4cc5a5bbafe55632ca8eba02ce57e610d98f8440c09
data/.gitlab-ci.yml CHANGED
@@ -9,3 +9,21 @@ test:
9
9
  cache:
10
10
  paths:
11
11
  - vendor/ruby
12
+
13
+ # integration tests
14
+ itest:
15
+ services:
16
+ - name: postgres:12
17
+ alias: postgres
18
+ command: ["postgres", "-c", "shared_preload_libraries=pg_stat_statements", "-c", "pg_stat_statements.track=all"]
19
+ variables:
20
+ POSTGRES_USER: testuser
21
+ POSTGRES_PASSWORD: testpass
22
+ before_script:
23
+ - bundle config set path vendor
24
+ - bundle install --jobs $(nproc)
25
+ script:
26
+ - bundle exec rake itest
27
+ cache:
28
+ paths:
29
+ - vendor/ruby
data/Rakefile CHANGED
@@ -9,4 +9,10 @@ Rake::TestTask.new(:test) do |test|
9
9
  test.verbose = true
10
10
  end
11
11
 
12
+ Rake::TestTask.new(:itest) do |test|
13
+ test.libs << 'lib' << 'test'
14
+ test.test_files = FileList['test/**/itest_*.rb']
15
+ test.verbose = true
16
+ end
17
+
12
18
  task :default => [:build]
@@ -2,10 +2,10 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.2.1'
5
+ s.version = '0.3.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
- s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog'
8
+ s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
9
9
  s.summary = 'fluentd plugins to work with PostgreSQL CSV logs'
10
10
  s.description = 'fluentd plugins to work with PostgreSQL CSV logs'
11
11
 
@@ -15,6 +15,7 @@ Gem::Specification.new do |s|
15
15
  s.require_paths = ['lib']
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
+ s.add_dependency 'pg', '~> 1.1'
18
19
  s.add_dependency 'pg_query', '~> 2.0'
19
20
 
20
21
  s.add_development_dependency 'rake'
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent
8
+ # PgStatStatementsInput will periodically poll postgres, querying pg_stat_statements
9
+ # for queryid to query mappings. These are then normalized for security purposes
10
+ # fingerprinted and emitted as records with the following format:
11
+ # {
12
+ # 'fingerprint' => '8a6e9896bd9048a2',
13
+ # 'query' => 'SELECT * FROM table ORDER BY queryid LIMIT $1',
14
+ # 'query_length' => 58,
15
+ # 'queryid' => 3239318621761098074
16
+ # }
17
+ class PgStatStatementsInput < Input
18
+ Fluent::Plugin.register_input('pg_stat_statements', self)
19
+
20
+ desc 'PostgreSQL host'
21
+ config_param :host, :string
22
+
23
+ desc 'RDBMS port (default: 5432)'
24
+ config_param :port, :integer, default: 5432
25
+
26
+ desc 'login user name'
27
+ config_param :username, :string, default: nil
28
+
29
+ desc 'postgres db'
30
+ config_param :dbname, :string, default: nil
31
+
32
+ desc 'login password'
33
+ config_param :password, :string, default: nil, secret: true
34
+
35
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
+ # for options
37
+ desc 'postgres sslmode'
38
+ config_param :sslmode, :string, default: 'prefer'
39
+
40
+ desc 'tag'
41
+ config_param :tag, :string, default: nil
42
+
43
+ desc 'interval in second to run query'
44
+ config_param :interval, :time, default: 300
45
+
46
+ desc 'Name of field to store SQL query fingerprint'
47
+ config_param :fingerprint_key, :string, default: 'fingerprint'
48
+
49
+ def start
50
+ @stop_flag = false
51
+ @thread = Thread.new(&method(:thread_main))
52
+ end
53
+
54
+ def shutdown
55
+ @stop_flag = true
56
+
57
+ # Interrupt thread and wait for it to finish
58
+ Thread.new { @thread.run } if @thread
59
+ @thread.join
60
+ end
61
+
62
+ def thread_main
63
+ until @stop_flag
64
+ sleep @interval
65
+ break if @stop_flag
66
+
67
+ begin
68
+ with_connection do |conn|
69
+ emit_statements_to_stream(conn)
70
+ end
71
+ rescue StandardError => e
72
+ log.error 'unexpected error', error: e.message, error_class: e.class
73
+ log.error_backtrace e.backtrace
74
+ end
75
+ end
76
+ end
77
+
78
+ # Returns a fluentd record for a query row
79
+ def record_for_row(row)
80
+ query = row['query']
81
+
82
+ # We record the query_length as it will help in understanding whether unparseable
83
+ # queries are truncated.
84
+ record = { 'queryid' => row['queryid'], 'query_length' => query&.length }
85
+
86
+ return record unless query
87
+
88
+ normalized = PgQuery.normalize(query)
89
+ record['query'] = normalized
90
+
91
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
92
+
93
+ record
94
+ rescue PgQuery::ParseError
95
+ record['query_unparseable'] = true
96
+
97
+ record
98
+ end
99
+
100
+ private
101
+
102
+ # Query the database and emit statements to fluentd router
103
+ def emit_statements_to_stream(conn)
104
+ me = MultiEventStream.new
105
+
106
+ now = Engine.now
107
+ conn.exec('SELECT queryid, query FROM pg_stat_statements').each do |row|
108
+ record = record_for_row(row)
109
+ me.add(now, record)
110
+ end
111
+
112
+ @router.emit_stream(@tag, me)
113
+ end
114
+
115
+ # Since this query is very infrequent, and it may be communicating directly
116
+ # with postgres without pgbouncer, don't use a persistent connection and
117
+ # ensure that it is properly closed
118
+ def with_connection(&block)
119
+ conn = PG.connect(
120
+ host: @host,
121
+ dbname: @dbname,
122
+ sslmode: @sslmode,
123
+ user: @username,
124
+ password: @password
125
+ )
126
+ conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
+
128
+ begin
129
+ block.call(conn)
130
+ ensure
131
+ # Always close the connection
132
+ conn.finish
133
+ end
134
+ end
135
+ end
136
+ end
data/test/helper.rb CHANGED
@@ -7,10 +7,13 @@ $LOAD_PATH.unshift(File.join(__dir__, '..', 'lib'))
7
7
  $LOAD_PATH.unshift(__dir__)
8
8
  require 'fluent/test'
9
9
  require 'fluent/test/driver/filter'
10
+ require 'fluent/test/driver/input'
10
11
  require 'fluent/test/helpers'
11
12
 
12
13
  Test::Unit::TestCase.include(Fluent::Test::Helpers)
14
+ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
13
15
 
14
16
  require 'fluent/plugin/filter_postgresql_slowlog'
15
17
  require 'fluent/plugin/filter_postgresql_redactor'
16
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/input_pg_stat_statements'
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputIntegrationTest < Test::Unit::TestCase
6
+ # These items are configured in .gitlab-ci.yml on the postgres service
7
+ HOST = 'postgres'
8
+ USERNAME = 'testuser'
9
+ PASSWORD = 'testpass'
10
+
11
+ def setup
12
+ Fluent::Test.setup
13
+
14
+ @conn = PG.connect(
15
+ host: HOST,
16
+ user: USERNAME,
17
+ password: PASSWORD
18
+ )
19
+
20
+ try_setup_extension
21
+ create_known_statement
22
+ end
23
+
24
+ def teardown
25
+ @conn&.finish
26
+ end
27
+
28
+ # Setup pg_stat_statements extension
29
+ def try_setup_extension
30
+ @conn.exec('CREATE EXTENSION pg_stat_statements')
31
+ rescue PG::DuplicateObject
32
+ end
33
+
34
+ # This statement gives us something to look for in the emitted stream
35
+ def create_known_statement
36
+ @conn.exec('SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT 1')
37
+ end
38
+
39
+ VALID_CONFIG = %(
40
+ tag postgres.pg_stat_statements
41
+ host #{HOST}
42
+ username #{USERNAME}
43
+ password #{PASSWORD}
44
+ interval 1
45
+ )
46
+
47
+ INVALID_CONFIG = %(
48
+ host 'invalid_host.dne'
49
+ port 1234
50
+ username #{USERNAME}
51
+ password #{PASSWORD}
52
+ interval 1
53
+ )
54
+
55
+ def create_driver(config)
56
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(config)
57
+ end
58
+
59
+ sub_test_case 'configuration' do
60
+ test 'connects' do
61
+ d = create_driver(VALID_CONFIG)
62
+
63
+ emits = []
64
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
65
+ d.run(num_waits = 50) do
66
+ emits = d.emits
67
+ end
68
+
69
+ assert_false emits.empty?
70
+ end
71
+
72
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
73
+ # the fluentd configuration to fail. We would rather retry until we get a connection
74
+ test 'connects for an invalid config' do
75
+ d = create_driver(INVALID_CONFIG)
76
+
77
+ emits = []
78
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
79
+ d.run(num_waits = 50) do
80
+ emits = d.emits
81
+ end
82
+
83
+ assert_true emits.empty?
84
+ end
85
+ end
86
+
87
+ sub_test_case 'execution' do
88
+ test 'connects' do
89
+ d = create_driver(VALID_CONFIG)
90
+
91
+ emits = []
92
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
93
+ d.run(num_waits = 50) do
94
+ emits = d.emits
95
+ end
96
+
97
+ expected_record = {
98
+ 'fingerprint' => '8a6e9896bd9048a2',
99
+ 'query' => 'SELECT * FROM pg_stat_statements ORDER BY queryid LIMIT $1',
100
+ 'query_length' => 58,
101
+ 'queryid' => 3_239_318_621_761_098_074
102
+ }
103
+ known_statement_event = emits.find do |event|
104
+ record = event[2]
105
+ record['query'] == expected_record['query']
106
+ end
107
+
108
+ assert_false known_statement_event.nil?
109
+
110
+ tag = known_statement_event[0]
111
+ record = known_statement_event[2]
112
+
113
+ assert_equal 'postgres.pg_stat_statements', tag
114
+ assert_equal expected_record['fingerprint'], record['fingerprint']
115
+ assert_equal expected_record['query_length'], record['query_length']
116
+ assert_true expected_record.include? 'queryid'
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatStatementsInputTest < Test::Unit::TestCase
6
+ def setup
7
+ Fluent::Test.setup
8
+ end
9
+
10
+ CONFIG = %(
11
+ tag postgres.pg_stat_statements
12
+ host localhost
13
+ port 1234
14
+ dbname gitlab
15
+ sslmode require
16
+ username moo
17
+ password secret
18
+ interval 600
19
+ fingerprint_key fingerprint
20
+ )
21
+
22
+ def create_driver
23
+ Fluent::Test::InputTestDriver.new(Fluent::PgStatStatementsInput).configure(CONFIG)
24
+ end
25
+
26
+ sub_test_case 'configuration' do
27
+ test 'basic configuration' do
28
+ d = create_driver
29
+
30
+ assert_equal 'postgres.pg_stat_statements', d.instance.tag
31
+ assert_equal 'localhost', d.instance.host
32
+ assert_equal 1234, d.instance.port
33
+ assert_equal 'gitlab', d.instance.dbname
34
+ assert_equal 'require', d.instance.sslmode
35
+ assert_equal 'moo', d.instance.username
36
+ assert_equal 'secret', d.instance.password
37
+ assert_equal 600, d.instance.interval
38
+ assert_equal 'fingerprint', d.instance.fingerprint_key
39
+ end
40
+ end
41
+
42
+ sub_test_case 'execution' do
43
+ test 'sql' do
44
+ d = create_driver
45
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM users WHERE user_id = ?' })
46
+
47
+ expected = {
48
+ 'fingerprint' => 'c071dee80d466e7d',
49
+ 'query' => 'SELECT * FROM users WHERE user_id = ?',
50
+ 'query_length' => 37,
51
+ 'queryid' => '1234'
52
+ }
53
+
54
+ assert_equal expected, record
55
+ end
56
+
57
+ test 'nil query' do
58
+ d = create_driver
59
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => nil })
60
+
61
+ expected = { 'query_length' => nil, 'queryid' => '1234' }
62
+ assert_equal expected, record
63
+ end
64
+
65
+ test 'ddl query' do
66
+ d = create_driver
67
+ ddl_sql = <<-SQL
68
+ CREATE TABLE accounts (
69
+ user_id serial PRIMARY KEY,
70
+ username VARCHAR(50) UNIQUE NOT NULL,
71
+ password VARCHAR(50) NOT NULL,
72
+ email VARCHAR(255) UNIQUE NOT NULL,
73
+ created_on TIMESTAMP NOT NULL,
74
+ last_login TIMESTAMP
75
+ )
76
+ SQL
77
+
78
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => ddl_sql })
79
+
80
+ expected = {
81
+ 'fingerprint' => 'fa9c9d26757c4f9b',
82
+ 'query' => ddl_sql,
83
+ 'query_length' => 287,
84
+ 'queryid' => '1234'
85
+ }
86
+ assert_equal expected, record
87
+ end
88
+
89
+ test 'set command' do
90
+ d = create_driver
91
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => "SET TIME ZONE 'PST8PDT'" })
92
+
93
+ expected = {
94
+ 'fingerprint' => '23f8d6eb1d3125c3',
95
+ 'query' => 'SET TIME ZONE $1',
96
+ 'query_length' => 23,
97
+ 'queryid' => '1234'
98
+ }
99
+
100
+ assert_equal expected, record
101
+ end
102
+
103
+ test 'unparseable sql' do
104
+ d = create_driver
105
+ record = d.instance.record_for_row({ 'queryid' => '1234', 'query' => 'SELECT * FROM' })
106
+
107
+ expected = { 'query_length' => 13, 'query_unparseable' => true, 'queryid' => '1234' }
108
+ assert_equal expected, record
109
+ end
110
+ end
111
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-23 00:00:00.000000000 Z
11
+ date: 2021-06-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -30,6 +30,20 @@ dependencies:
30
30
  - - "<"
31
31
  - !ruby/object:Gem::Version
32
32
  version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: pg
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '1.1'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '1.1'
33
47
  - !ruby/object:Gem::Dependency
34
48
  name: pg_query
35
49
  requirement: !ruby/object:Gem::Requirement
@@ -88,12 +102,15 @@ files:
88
102
  - lib/fluent/plugin/filter_marginalia.rb
89
103
  - lib/fluent/plugin/filter_postgresql_redactor.rb
90
104
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
105
+ - lib/fluent/plugin/input_pg_stat_statements.rb
91
106
  - lib/fluent/plugin/parser_multiline_csv.rb
92
107
  - test/helper.rb
108
+ - test/plugin/itest_input_pg_stat_statements.rb
93
109
  - test/plugin/test_filter_marginalia.rb
94
110
  - test/plugin/test_filter_postgresql_redactor.rb
95
111
  - test/plugin/test_filter_postgresql_slowlog.rb
96
- homepage: https://gitlab.com/gitlab-org/fluent-plugin-postgresql-csvlog
112
+ - test/plugin/test_input_pg_stat_statements.rb
113
+ homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
97
114
  licenses: []
98
115
  metadata: {}
99
116
  post_install_message:
@@ -117,6 +134,8 @@ specification_version: 4
117
134
  summary: fluentd plugins to work with PostgreSQL CSV logs
118
135
  test_files:
119
136
  - test/helper.rb
137
+ - test/plugin/itest_input_pg_stat_statements.rb
120
138
  - test/plugin/test_filter_marginalia.rb
121
139
  - test/plugin/test_filter_postgresql_redactor.rb
122
140
  - test/plugin/test_filter_postgresql_slowlog.rb
141
+ - test/plugin/test_input_pg_stat_statements.rb