fluent-plugin-postgresql-csvlog 0.3.2 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d61fc31718e43c6d1dff46139a1b03d56384405d45b69c9772dc6f7b6a66dbf
4
- data.tar.gz: 5d2d23a4a7b5f277b19f181aa515ab74718f6ef93824e2c958264d9bbdf9c9aa
3
+ metadata.gz: e1cc2264997378ec801c6623768d57ca27ce1c28d6ac8ffcb6a905add2ce9f16
4
+ data.tar.gz: 79aab5d187209db5790f9bca181ea0ca8c909c808809590ad46cef0f6645902c
5
5
  SHA512:
6
- metadata.gz: f2106f60749b6fa8fc931ccd3d85f51595b0ec60eb9f425935072b1749ed068e8a068c6d513f1557a20e8c5a0613acad80c6690c7ce2aa20cca3027d686c388a
7
- data.tar.gz: 9ceef623cbd5256e047dea817d487d2711cc8b387ff068a10913100fe649e77174a600ac019b4729403390152c1a3e6f162accf4cc50f020561032b6493d965f
6
+ metadata.gz: e38e47908ac33706c4b6679fabc97a395bb63f8a110cb6a836c93f984dbfaf1f77f2204209cfe8a0609ddf969ab960cd855ce363398388084a0026e5df0c4970
7
+ data.tar.gz: 94505a6d7ceee368c22b0c32081b85aaf215d28217ceed1ff8f58bab840367f5afe19ed725056394e57fd1d6ebfe3ae98fba66b39eb781cfa2f1f80a602385c6
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/.gitlab-ci.yml CHANGED
@@ -27,3 +27,16 @@ itest:
27
27
  cache:
28
28
  paths:
29
29
  - vendor/ruby
30
+
31
+ end_to_end_verification_test:
32
+ image: docker:19.03.12
33
+ services:
34
+ - docker:19.03.12-dind
35
+ tags:
36
+ - gitlab-org-docker
37
+ variables:
38
+ DOCKER_TLS_CERTDIR: ""
39
+ before_script:
40
+ - apk add --no-cache docker-compose
41
+ script:
42
+ - docker-compose run --rm verifier
data/README.md CHANGED
@@ -7,6 +7,8 @@ parse PostgreSQL CSV log files and extract slow log information:
7
7
  - `PostgreSQLSlowLog`: Extracts slow log entries into `duration_s` and `statement` fields
8
8
  - `PostgreSQLRedactor`: Normalizes the SQL query and redacts sensitive information
9
9
  - `Marginalia`: Parses [Marginalia comments](https://github.com/basecamp/marginalia) into key-value pairs and stores them
10
+ - `PgStatStatementsInput`: polls the [`pg_stat_statements`](https://www.postgresql.org/docs/current/pgstatstatements.html) postgres plugin and emits fluentd events.
11
+ - `PgStatActivityInput`: polls the [`postges activity monitor`](https://www.postgresql.org/docs/current/monitoring-stats.html) and emits fluentd events.
10
12
 
11
13
  ## Installation
12
14
 
@@ -72,3 +74,17 @@ ingest and parse PostgreSQL CSV logs:
72
74
  </format>
73
75
  </match>
74
76
  ```
77
+
78
+ ## Developing `fluent-plugin-postgresql-csvlog`
79
+
80
+ To develop and debug locally, there is a `Dockerfile` and `docker-compose.yml` that will setup a local environment,
81
+ complete with Postgres, suitable for testing purposes.
82
+
83
+ 1. `docker compose rm verifier --rm` - test the current configuration
84
+ 1. `docker compose up`
85
+
86
+ ### Releasing a new version
87
+
88
+ 1. Update the version in `fluent-plugin-postgresql-csvlog.gemspec`.
89
+ 1. Create a merge request and merge the changes to `master`.
90
+ 1. Run `bundle exec rake release`.
data/docker-compose.yml CHANGED
@@ -1,14 +1,9 @@
1
1
  # Docker Compose setup useful for testing and development purposes
2
- version: "3.9"
2
+ version: "3.3"
3
3
  services:
4
- fluentd:
5
- build: .
6
- links:
7
- - postgres
8
- entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
9
4
  postgres:
10
5
  image: postgres
11
- restart: always
6
+ restart: "no"
12
7
  environment:
13
8
  - POSTGRES_USER=testuser
14
9
  - POSTGRES_PASSWORD=testpass
@@ -17,3 +12,26 @@ services:
17
12
  command: postgres -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all
18
13
  volumes:
19
14
  - ./sql/create_extension.sql:/docker-entrypoint-initdb.d/create_extension.sql
15
+
16
+ fluentd:
17
+ build: .
18
+ restart: "no"
19
+ links:
20
+ - postgres
21
+ entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
22
+ volumes:
23
+ - ./example-fluentd.conf:/src/example-fluentd.conf
24
+ - log-volume:/var/log/pg/
25
+
26
+ verifier:
27
+ image: alpine:3.13
28
+ restart: "no"
29
+ links:
30
+ - fluentd
31
+ command: /bin/sh /src/verify-docker-compose.sh
32
+ volumes:
33
+ - ./test/verify-docker-compose.sh:/src/verify-docker-compose.sh
34
+ - log-volume:/var/log/pg/
35
+
36
+ volumes:
37
+ log-volume:
data/example-fluentd.conf CHANGED
@@ -8,5 +8,18 @@
8
8
  </source>
9
9
 
10
10
  <match postgres.pg_stat_statements>
11
- @type stdout
11
+ @type file
12
+ path /var/log/pg/pg_stat_statements
13
+ time_slice_format %Y%m%d%H%M%S
14
+ flush_interval 1s
15
+ utc
12
16
  </match>
17
+
18
+ <source>
19
+ @type pg_stat_activity
20
+ tag postgres.pg_stat_activity
21
+ host postgres
22
+ username testuser
23
+ password testpass
24
+ interval 1
25
+ </source>
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.3.2'
5
+ s.version = '0.4.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
18
  s.add_dependency 'pg', '~> 1.1'
19
- s.add_dependency 'pg_query', '~> 2.0'
19
+ s.add_dependency 'pg_query', '~> 2.1'
20
20
 
21
21
  s.add_development_dependency 'rake'
22
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'fluent/plugin/filter'
4
+ require_relative './marginalia_extractor'
4
5
 
5
6
  module Fluent
6
7
  module Plugin
@@ -11,6 +12,7 @@ module Fluent
11
12
  # /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"
12
13
  #
13
14
  class Marginalia < Filter
15
+ include MarginaliaExtractor
14
16
  Fluent::Plugin.register_filter('marginalia', self)
15
17
 
16
18
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
@@ -19,77 +21,11 @@ module Fluent
19
21
  desc 'Whether to strip the comment from the record specified by key'
20
22
  config_param :strip_comment, :bool, default: true
21
23
 
22
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
- MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
24
-
25
24
  def filter(_tag, _time, record)
26
- parse_comments(record)
25
+ parse_marginalia_into_record(record, @key, @strip_comment)
27
26
 
28
27
  record
29
28
  end
30
-
31
- private
32
-
33
- def parse_comments(record)
34
- sql = record[@key]
35
-
36
- return unless sql
37
-
38
- comment_match = match_marginalia_comment(sql)
39
-
40
- return unless comment_match
41
-
42
- entries = extract_entries(comment_match['comment'])
43
- parse_entries(entries, record)
44
-
45
- record[@key] = comment_match['sql'].strip if @strip_comment
46
- end
47
-
48
- def match_marginalia_comment(sql)
49
- matched = MARGINALIA_PREPENDED_REGEXP.match(sql)
50
-
51
- return matched if matched
52
-
53
- MARGINALIA_APPENDED_REGEXP.match(sql)
54
- end
55
-
56
- def extract_entries(comment)
57
- comment = scrub_comment(comment)
58
-
59
- return [] unless comment
60
-
61
- comment.split(',')
62
- end
63
-
64
- def scrub_comment(comment)
65
- return unless comment
66
-
67
- comment.strip!
68
- comment.gsub!(%r{^/\*}, '')
69
- comment.gsub!(%r{\*/$}, '')
70
- end
71
-
72
- def parse_entries(entries, record)
73
- entries.each do |component|
74
- data = component.split(':', 2)
75
-
76
- break unless data.length == 2
77
-
78
- stored_key = store_key(record, data[0])
79
- record[stored_key] = data[1]
80
- end
81
- end
82
-
83
- def store_key(record, component_key)
84
- # In case there is a conflict with the Marginalia key
85
- # (e.g. `correlation_id`), we use the base key
86
- # (`sql_correlation_id`) instead.
87
- if record.key?(component_key)
88
- "#{@key}_#{component_key}"
89
- else
90
- component_key
91
- end
92
- end
93
29
  end
94
30
  end
95
31
  end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './polling_pg_input_plugin'
4
+ require 'pg_query'
5
+ require_relative './marginalia_extractor'
6
+ require 'time'
7
+
8
+ module Fluent::Plugin
9
+ # PgStatActivityInput polls the `pg_stat_activity` table
10
+ # emitting normalized versions of the queries currently running on
11
+ # the postgres server.
12
+ # Fingerprints of the queries are also included for easier aggregation
13
+ class PgStatActivityInput < PollingPostgresInputPlugin
14
+ include MarginaliaExtractor
15
+ Fluent::Plugin.register_input('pg_stat_activity', self)
16
+
17
+ ACTIVITY_QUERY = <<-SQL
18
+ SELECT
19
+ datid,
20
+ datname,
21
+ pid,
22
+ usesysid,
23
+ usename,
24
+ application_name,
25
+ host(client_addr) as client_addr,
26
+ client_hostname,
27
+ client_port,
28
+ xact_start,
29
+ extract(epoch from clock_timestamp() - xact_start) xact_age_s,
30
+ query_start,
31
+ extract(epoch from clock_timestamp() - query_start) query_age_s,
32
+ state_change,
33
+ extract(epoch from clock_timestamp() - state_change) state_age_s,
34
+ state,
35
+ query
36
+ FROM pg_stat_activity
37
+ WHERE usename IS NOT NULL
38
+ SQL
39
+
40
+ desc 'Name of field to store SQL query fingerprint'
41
+ config_param :fingerprint_key, :string, default: 'fingerprint'
42
+
43
+ protected
44
+
45
+ def on_poll
46
+ with_connection do |conn|
47
+ emit_activity_to_stream(conn)
48
+ end
49
+ end
50
+
51
+ public
52
+
53
+ # Query the database and emit statements to fluentd router
54
+ def emit_activity_to_stream(conn)
55
+ me = Fluent::MultiEventStream.new
56
+
57
+ now = Fluent::Engine.now
58
+ conn.exec(ACTIVITY_QUERY).each do |row|
59
+ record = record_for_row(row)
60
+ me.add(now, record)
61
+ end
62
+
63
+ @router.emit_stream(@tag, me)
64
+ end
65
+
66
+ # Returns a fluentd record for a query row
67
+ def record_for_row(row)
68
+ record = {
69
+ 'datid' => row['datid'],
70
+ 'datname' => row['datname'],
71
+ 'pid' => row['pid'],
72
+ 'usesysid' => row['usesysid'],
73
+ 'usename' => row['usename'],
74
+ 'application_name' => row['application_name'],
75
+ 'client_addr' => row['client_addr'],
76
+ 'client_hostname' => row['client_hostname'],
77
+ 'client_port' => row['client_port'],
78
+ 'xact_start' => row['xact_start']&.iso8601(3),
79
+ 'xact_age_s' => row['xact_age_s'],
80
+ 'query_start' => row['query_start']&.iso8601(3),
81
+ 'query_age_s' => row['query_age_s'],
82
+ 'state_change' => row['state_change']&.iso8601(3),
83
+ 'state_age_s' => row['state_age_s'],
84
+ 'state' => row['state'],
85
+ 'query' => row['query'] # This will be stripped, normalized etc
86
+ }
87
+
88
+ # Inject marginalia into record
89
+ parse_marginalia_into_record(record, 'query', true)
90
+
91
+ # Normalize query and fingerprint
92
+ # Note that `record['query']` was updated in previous step
93
+ # To strip off marginalia comments
94
+ record.merge!(fingerprint_query(record['query']))
95
+
96
+ record
97
+ end
98
+
99
+ def fingerprint_query(query)
100
+ # We record the query_length as it will help in understanding whether unparseable
101
+ # queries are truncated.
102
+ record = { 'query_length' => query&.length, 'query' => nil }
103
+
104
+ return record unless query
105
+
106
+ normalized = PgQuery.normalize(query)
107
+ record['query'] = normalized
108
+
109
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
110
+
111
+ record
112
+ rescue PgQuery::ParseError
113
+ record['query_unparseable'] = true
114
+
115
+ record
116
+ end
117
+ end
118
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'fluent/plugin/input'
4
- require 'pg'
3
+ require_relative './polling_pg_input_plugin'
5
4
  require 'pg_query'
6
5
 
7
6
  module Fluent::Plugin
@@ -14,67 +13,22 @@ module Fluent::Plugin
14
13
  # 'query_length' => 58,
15
14
  # 'queryid' => 3239318621761098074
16
15
  # }
17
- class PgStatStatementsInput < Input
16
+ class PgStatStatementsInput < PollingPostgresInputPlugin
18
17
  Fluent::Plugin.register_input('pg_stat_statements', self)
19
18
 
20
- desc 'PostgreSQL host'
21
- config_param :host, :string
22
-
23
- desc 'RDBMS port (default: 5432)'
24
- config_param :port, :integer, default: 5432
25
-
26
- desc 'login user name'
27
- config_param :username, :string, default: nil
28
-
29
- desc 'postgres db'
30
- config_param :dbname, :string, default: nil
31
-
32
- desc 'login password'
33
- config_param :password, :string, default: nil, secret: true
34
-
35
- # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
- # for options
37
- desc 'postgres sslmode'
38
- config_param :sslmode, :string, default: 'prefer'
39
-
40
- desc 'tag'
41
- config_param :tag, :string, default: nil
42
-
43
- desc 'interval in second to run query'
44
- config_param :interval, :time, default: 300
45
-
46
19
  desc 'Name of field to store SQL query fingerprint'
47
20
  config_param :fingerprint_key, :string, default: 'fingerprint'
48
21
 
49
- def start
50
- @stop_flag = false
51
- @thread = Thread.new(&method(:thread_main))
52
- end
22
+ protected
53
23
 
54
- def shutdown
55
- @stop_flag = true
56
-
57
- # Interrupt thread and wait for it to finish
58
- Thread.new { @thread.run } if @thread
59
- @thread.join
60
- end
61
-
62
- def thread_main
63
- until @stop_flag
64
- sleep @interval
65
- break if @stop_flag
66
-
67
- begin
68
- with_connection do |conn|
69
- emit_statements_to_stream(conn)
70
- end
71
- rescue StandardError => e
72
- log.error 'unexpected error', error: e.message, error_class: e.class
73
- log.error_backtrace e.backtrace
74
- end
24
+ def on_poll
25
+ with_connection do |conn|
26
+ emit_statements_to_stream(conn)
75
27
  end
76
28
  end
77
29
 
30
+ public
31
+
78
32
  # Returns a fluentd record for a query row
79
33
  def record_for_row(row)
80
34
  query = row['query']
@@ -97,8 +51,6 @@ module Fluent::Plugin
97
51
  record
98
52
  end
99
53
 
100
- private
101
-
102
54
  # Query the database and emit statements to fluentd router
103
55
  def emit_statements_to_stream(conn)
104
56
  me = Fluent::MultiEventStream.new
@@ -111,26 +63,5 @@ module Fluent::Plugin
111
63
 
112
64
  @router.emit_stream(@tag, me)
113
65
  end
114
-
115
- # Since this query is very infrequent, and it may be communicating directly
116
- # with postgres without pgbouncer, don't use a persistent connection and
117
- # ensure that it is properly closed
118
- def with_connection(&block)
119
- conn = PG.connect(
120
- host: @host,
121
- dbname: @dbname,
122
- sslmode: @sslmode,
123
- user: @username,
124
- password: @password
125
- )
126
- conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
-
128
- begin
129
- block.call(conn)
130
- ensure
131
- # Always close the connection
132
- conn.finish
133
- end
134
- end
135
66
  end
136
67
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ # MarginaliaExtractor provides the parse_marginalia_into_record
7
+ # utility method, useful for extracting marginalia into fluentd records
8
+ module MarginaliaExtractor
9
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
10
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
11
+
12
+ # Injects marginalia into a fluentd record
13
+ def parse_marginalia_into_record(record, key, strip_comment)
14
+ sql = record[key]
15
+ return unless sql
16
+
17
+ comment_match = match_marginalia_comment(sql)
18
+
19
+ return unless comment_match
20
+
21
+ entries = extract_entries(comment_match['comment'])
22
+ parse_entries(entries, key, record)
23
+
24
+ record[key] = comment_match['sql'].strip if strip_comment
25
+ end
26
+
27
+ def match_marginalia_comment(sql)
28
+ matched = MARGINALIA_PREPENDED_REGEXP.match(sql)
29
+
30
+ return matched if matched
31
+
32
+ MARGINALIA_APPENDED_REGEXP.match(sql)
33
+ end
34
+
35
+ def extract_entries(comment)
36
+ comment = scrub_comment(comment)
37
+
38
+ return [] unless comment
39
+
40
+ comment.split(',')
41
+ end
42
+
43
+ def scrub_comment(comment)
44
+ return unless comment
45
+
46
+ comment.strip!
47
+ comment.gsub!(%r{^/\*}, '')
48
+ comment.gsub!(%r{\*/$}, '')
49
+ end
50
+
51
+ def parse_entries(entries, key, record)
52
+ entries.each do |component|
53
+ data = component.split(':', 2)
54
+
55
+ break unless data.length == 2
56
+
57
+ stored_key = store_key(record, key, data[0])
58
+ record[stored_key] = data[1]
59
+ end
60
+ end
61
+
62
+ def store_key(record, key, component_key)
63
+ # In case there is a conflict with the Marginalia key
64
+ # (e.g. `correlation_id`), we use the base key
65
+ # (`sql_correlation_id`) instead.
66
+ if record.key?(component_key)
67
+ "#{key}_#{component_key}"
68
+ else
69
+ component_key
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent::Plugin
8
+ # PollingPostgresInputPlugin is intended to be used as an base class
9
+ # for input plugins that poll postgres.
10
+ #
11
+ # Child classes should implement the `on_poll` method
12
+ class PollingPostgresInputPlugin < Input
13
+ desc 'PostgreSQL host'
14
+ config_param :host, :string
15
+
16
+ desc 'RDBMS port (default: 5432)'
17
+ config_param :port, :integer, default: 5432
18
+
19
+ desc 'login user name'
20
+ config_param :username, :string, default: nil
21
+
22
+ desc 'postgres db'
23
+ config_param :dbname, :string, default: nil
24
+
25
+ desc 'login password'
26
+ config_param :password, :string, default: nil, secret: true
27
+
28
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
29
+ # for options
30
+ desc 'postgres sslmode'
31
+ config_param :sslmode, :string, default: 'prefer'
32
+
33
+ desc 'tag'
34
+ config_param :tag, :string, default: nil
35
+
36
+ desc 'interval in second to run query'
37
+ config_param :interval, :time, default: 300
38
+
39
+ def start
40
+ @stop_flag = false
41
+ @thread = Thread.new(&method(:thread_main))
42
+ end
43
+
44
+ # Fluentd shutdown method, called to terminate and cleanup plugin
45
+ def shutdown
46
+ @stop_flag = true
47
+
48
+ # Interrupt thread and wait for it to finish
49
+ Thread.new { @thread.run } if @thread
50
+ @thread.join
51
+ end
52
+
53
+ # Main polling loop on thread
54
+ def thread_main
55
+ until @stop_flag
56
+ sleep @interval
57
+ break if @stop_flag
58
+
59
+ begin
60
+ on_poll
61
+ rescue StandardError => e
62
+ log.error 'unexpected error', error: e.message, error_class: e.class
63
+ log.error_backtrace e.backtrace
64
+ end
65
+ end
66
+ end
67
+
68
+ protected
69
+
70
+ # Child-classes should implement this method
71
+ def on_poll
72
+ raise 'on_poll must be implemented by descendents of PollingPostgresInputPlugin'
73
+ end
74
+
75
+ # Since this query is very infrequent, and it may be communicating directly
76
+ # with postgres without pgbouncer, don't use a persistent connection and
77
+ # ensure that it is properly closed
78
+ def with_connection(&block)
79
+ conn = PG.connect(
80
+ host: @host,
81
+ dbname: @dbname,
82
+ sslmode: @sslmode,
83
+ user: @username,
84
+ password: @password
85
+ )
86
+
87
+ map = PG::BasicTypeMapForResults.new(conn)
88
+ map.default_type_map = PG::TypeMapAllStrings.new
89
+
90
+ conn.type_map_for_results = map
91
+
92
+ begin
93
+ block.call(conn)
94
+ ensure
95
+ # Always close the connection
96
+ conn.finish
97
+ end
98
+ end
99
+ end
100
+ end
data/test/helper.rb CHANGED
@@ -16,4 +16,6 @@ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
16
16
  require 'fluent/plugin/filter_postgresql_slowlog'
17
17
  require 'fluent/plugin/filter_postgresql_redactor'
18
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/marginalia_extractor'
19
20
  require 'fluent/plugin/in_pg_stat_statements'
21
+ require 'fluent/plugin/in_pg_stat_activity'
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatActivityInputIntegrationTest < Test::Unit::TestCase
6
+ # The defaults values work with the configuration in .gitlab-ci.yml on the postgres service
7
+ # Override with env vars for local development
8
+ HOST = ENV.fetch('PG_TEST_HOST', 'postgres')
9
+ USERNAME = ENV.fetch('PG_TEST_USER', 'testuser')
10
+ PASSWORD = ENV.fetch('PG_TEST_PASSWORD', 'testpass')
11
+
12
+ def setup
13
+ Fluent::Test.setup
14
+
15
+ @conn = PG.connect(
16
+ host: HOST,
17
+ user: USERNAME,
18
+ password: PASSWORD
19
+ )
20
+ end
21
+
22
+ def teardown
23
+ @conn&.finish
24
+ end
25
+
26
+ VALID_CONFIG = %(
27
+ tag postgres.pg_stat_statements
28
+ host #{HOST}
29
+ username #{USERNAME}
30
+ password #{PASSWORD}
31
+ interval 1
32
+ )
33
+
34
+ INVALID_CONFIG = %(
35
+ host 'invalid_host.dne'
36
+ port 1234
37
+ username #{USERNAME}
38
+ password #{PASSWORD}
39
+ interval 1
40
+ )
41
+
42
+ def create_driver(config)
43
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatActivityInput).configure(config)
44
+ end
45
+
46
+ sub_test_case 'configuration' do
47
+ test 'connects' do
48
+ d = create_driver(VALID_CONFIG)
49
+
50
+ emits = []
51
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
52
+ d.run(num_waits = 50) do
53
+ emits = d.emits
54
+ end
55
+
56
+ assert_false emits.empty?
57
+ end
58
+
59
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
60
+ # the fluentd configuration to fail. We would rather retry until we get a connection
61
+ test 'connects for an invalid config' do
62
+ d = create_driver(INVALID_CONFIG)
63
+
64
+ emits = []
65
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
66
+ d.run(num_waits = 50) do
67
+ emits = d.emits
68
+ end
69
+
70
+ assert_true emits.empty?
71
+ end
72
+ end
73
+
74
+ sub_test_case 'execution' do
75
+ test 'connects' do
76
+ d = create_driver(VALID_CONFIG)
77
+
78
+ emits = []
79
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
80
+ d.run(num_waits = 50) do
81
+ emits = d.emits
82
+ end
83
+
84
+ first_with_query = emits.find do |event|
85
+ record = event[2]
86
+
87
+ record['usename'] == USERNAME &&
88
+ !record['datid'].nil? &&
89
+ !record['query'].nil? &&
90
+ record['state'] == 'active'
91
+ end
92
+
93
+ assert_false first_with_query.nil?
94
+ record = first_with_query[2]
95
+
96
+ assert_false record['datname'].nil?
97
+ assert_false record['pid'].nil?
98
+ assert_false record['usesysid'].nil?
99
+ assert_false record['application_name'].nil?
100
+ assert_false record['client_addr'].nil?
101
+ assert_false record['client_port'].nil?
102
+ assert_false record['xact_start'].nil?
103
+ assert_false record['xact_age_s'].nil?
104
+ assert_false record['xact_start'].nil?
105
+ assert_false record['xact_age_s'].nil?
106
+ assert_false record['query_start'].nil?
107
+ assert_false record['query_age_s'].nil?
108
+ assert_false record['state_change'].nil?
109
+ assert_false record['state_age_s'].nil?
110
+ assert_false record['query_length'].nil?
111
+ assert_false record['query'].nil?
112
+ assert_false record['fingerprint'].nil?
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+ require 'date'
5
+
6
+ class PgStatActivityInputTest < Test::Unit::TestCase
7
+ def setup
8
+ Fluent::Test.setup
9
+ end
10
+
11
+ CONFIG = %(
12
+ tag postgres.pg_stat_activity
13
+ host localhost
14
+ port 1234
15
+ dbname gitlab
16
+ sslmode require
17
+ username moo
18
+ password secret
19
+ interval 600
20
+ fingerprint_key fingerprint
21
+ )
22
+
23
+ def create_driver
24
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatActivityInput).configure(CONFIG)
25
+ end
26
+
27
+ sub_test_case 'configuration' do
28
+ test 'basic configuration' do
29
+ d = create_driver
30
+
31
+ assert_equal 'postgres.pg_stat_activity', d.instance.tag
32
+ assert_equal 'localhost', d.instance.host
33
+ assert_equal 1234, d.instance.port
34
+ assert_equal 'gitlab', d.instance.dbname
35
+ assert_equal 'require', d.instance.sslmode
36
+ assert_equal 'moo', d.instance.username
37
+ assert_equal 'secret', d.instance.password
38
+ assert_equal 600, d.instance.interval
39
+ assert_equal 'fingerprint', d.instance.fingerprint_key
40
+ end
41
+ end
42
+
43
+ sub_test_case 'execution' do
44
+ test 'sql' do
45
+ d = create_driver
46
+ row = {
47
+ 'datid' => 16384,
48
+ 'datname' => 'testuser',
49
+ 'pid' => 376,
50
+ 'usesysid' => 10,
51
+ 'usename' => 'testuser',
52
+ 'application_name' => 'psql',
53
+ 'client_addr' => '172.17.0.1',
54
+ 'client_hostname' => nil,
55
+ 'client_port' => 60182,
56
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
57
+ 'xact_age_s' => 0.001884,
58
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
59
+ 'query_age_s' => 0.001894,
60
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
61
+ 'state_age_s' => 0.001894,
62
+ 'state' => 'active',
63
+ 'query' => "SELECT * FROM users WHERE user_secret = 's3cr3t'"
64
+ }
65
+
66
+ record = d.instance.record_for_row(row)
67
+
68
+ expected = {
69
+ 'application_name' => 'psql',
70
+ 'client_addr' => '172.17.0.1',
71
+ 'client_hostname' => nil,
72
+ 'client_port' => 60182,
73
+ 'datid' => 16384,
74
+ 'datname' => 'testuser',
75
+ 'fingerprint' => '5c4a61e156c7d822',
76
+ 'pid' => 376,
77
+ 'query' => 'SELECT * FROM users WHERE user_secret = $1', # NOTE: secret removed
78
+ 'query_age_s' => 0.001894,
79
+ 'query_length' => 48,
80
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
81
+ 'state' => 'active',
82
+ 'state_age_s' => 0.001894,
83
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
84
+ 'usename' => 'testuser',
85
+ 'usesysid' => 10,
86
+ 'xact_age_s' => 0.001884,
87
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
88
+ }
89
+
90
+ assert_equal expected, record
91
+ end
92
+
93
+ # This test mostly checks that the code is nil safe
94
+ test 'nil query' do
95
+ d = create_driver
96
+ record = d.instance.record_for_row({})
97
+
98
+ expected = {
99
+ 'application_name' => nil,
100
+ 'client_addr' => nil,
101
+ 'client_hostname' => nil,
102
+ 'client_port' => nil,
103
+ 'datid' => nil,
104
+ 'datname' => nil,
105
+ 'pid' => nil,
106
+ 'query' => nil,
107
+ 'query_age_s' => nil,
108
+ 'query_length' => nil,
109
+ 'query_start' => nil,
110
+ 'state' => nil,
111
+ 'state_age_s' => nil,
112
+ 'state_change' => nil,
113
+ 'usename' => nil,
114
+ 'usesysid' => nil,
115
+ 'xact_age_s' => nil,
116
+ 'xact_start' => nil
117
+ }
118
+
119
+ assert_equal expected, record
120
+ end
121
+
122
+ test 'unparseable sql' do
123
+ d = create_driver
124
+ row = {
125
+ 'datid' => 16384,
126
+ 'datname' => 'testuser',
127
+ 'pid' => 376,
128
+ 'usesysid' => 10,
129
+ 'usename' => 'testuser',
130
+ 'application_name' => 'psql',
131
+ 'client_addr' => '172.17.0.1',
132
+ 'client_hostname' => nil,
133
+ 'client_port' => 60182,
134
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
135
+ 'xact_age_s' => 0.001884,
136
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
137
+ 'query_age_s' => 0.001894,
138
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
139
+ 'state_age_s' => 0.001894,
140
+ 'state' => 'active',
141
+ 'query' => "SELECT * FROM users WHERE user_se="
142
+ }
143
+
144
+ record = d.instance.record_for_row(row)
145
+
146
+ expected = {
147
+ 'application_name' => 'psql',
148
+ 'client_addr' => '172.17.0.1',
149
+ 'client_hostname' => nil,
150
+ 'client_port' => 60182,
151
+ 'datid' => 16384,
152
+ 'datname' => 'testuser',
153
+ 'pid' => 376,
154
+ 'query' => nil,
155
+ 'query_age_s' => 0.001894,
156
+ 'query_length' => 34,
157
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
158
+ 'query_unparseable' => true,
159
+ 'state' => 'active',
160
+ 'state_age_s' => 0.001894,
161
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
162
+ 'usename' => 'testuser',
163
+ 'usesysid' => 10,
164
+ 'xact_age_s' => 0.001884,
165
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
166
+ }
167
+
168
+ assert_equal expected, record
169
+ end
170
+
171
+ test 'marginalia prepended' do
172
+ d = create_driver
173
+ row = {
174
+ 'datid' => 16384,
175
+ 'datname' => 'testuser',
176
+ 'pid' => 376,
177
+ 'usesysid' => 10,
178
+ 'usename' => 'testuser',
179
+ 'application_name' => 'psql',
180
+ 'client_addr' => '172.17.0.1',
181
+ 'client_hostname' => nil,
182
+ 'client_port' => 60182,
183
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
184
+ 'xact_age_s' => 0.001884,
185
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
186
+ 'query_age_s' => 0.001894,
187
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
188
+ 'state_age_s' => 0.001894,
189
+ 'state' => 'active',
190
+ 'query' => "/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT * FROM users WHERE user_secret = 's3cr3t'"
191
+ }
192
+
193
+ record = d.instance.record_for_row(row)
194
+
195
+ expected = {
196
+ 'application' => 'web',
197
+ 'application_name' => 'psql',
198
+ 'client_addr' => '172.17.0.1',
199
+ 'client_hostname' => nil,
200
+ 'client_port' => 60182,
201
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
202
+ 'datid' => 16384,
203
+ 'datname' => 'testuser',
204
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
205
+ 'fingerprint' => '5c4a61e156c7d822',
206
+ 'pid' => 376,
207
+ 'query' => 'SELECT * FROM users WHERE user_secret = $1', # Secret removed
208
+ 'query_age_s' => 0.001894,
209
+ 'query_length' => 48,
210
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
211
+ 'state' => 'active',
212
+ 'state_age_s' => 0.001894,
213
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
214
+ 'usename' => 'testuser',
215
+ 'usesysid' => 10,
216
+ 'xact_age_s' => 0.001884,
217
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
218
+ }
219
+
220
+ assert_equal expected, record
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class Marginalia < Test::Unit::TestCase
6
+ include Fluent::Plugin::MarginaliaExtractor
7
+
8
+ def test_parse(sql, record, key, strip_comment, expected)
9
+ record[key] = sql
10
+ parse_marginalia_into_record(record, key, strip_comment)
11
+ assert_equal(expected, record)
12
+ end
13
+
14
+ test 'no marginalia' do
15
+ sql = 'SELECT * FROM projects'
16
+ expected = { 'sql' => 'SELECT * FROM projects' }
17
+ test_parse(sql, {}, 'sql', true, expected)
18
+ end
19
+
20
+ test 'normal comment appended' do
21
+ sql = 'SELECT COUNT(*) FROM "projects" /* this is just a comment */'
22
+ expected = {
23
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
24
+ }
25
+ test_parse(sql, {}, 'sql', true, expected)
26
+ end
27
+
28
+ test 'marginalia appended for sidekiq' do
29
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/'
30
+ expected = {
31
+ 'application' => 'sidekiq',
32
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
33
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
34
+ 'job_class' => 'Geo::MetricsUpdateWorker',
35
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
36
+ }
37
+ test_parse(sql, {}, 'sql', true, expected)
38
+ end
39
+
40
+ test 'marginalia appended for web' do
41
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/'
42
+ expected = {
43
+ 'application' => 'web',
44
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
45
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
46
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
47
+ }
48
+ test_parse(sql, {}, 'sql', true, expected)
49
+ end
50
+
51
+ test 'normal comment prepended' do
52
+ sql = '/* this is just a comment */ SELECT COUNT(*) FROM "projects"'
53
+ expected = {
54
+ "sql" => 'SELECT COUNT(*) FROM "projects"'
55
+ }
56
+ test_parse(sql, {}, 'sql', true, expected)
57
+ end
58
+
59
+ test 'marginalia prepended for sidekiq' do
60
+ sql = '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"'
61
+ expected = {
62
+ 'application' => 'sidekiq',
63
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
64
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
65
+ 'job_class' => 'Geo::MetricsUpdateWorker',
66
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
67
+ }
68
+ test_parse(sql, {}, 'sql', true, expected)
69
+ end
70
+
71
+ test 'marginalia prepended for web' do
72
+ sql = '/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT COUNT(*) FROM "projects"'
73
+ expected = {
74
+ 'application' => 'web',
75
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
76
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
77
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
78
+ }
79
+ test_parse(sql, {}, 'sql', true, expected)
80
+ end
81
+
82
+ test 'marginalia prepended for web, comment_strip disabled' do
83
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/'
84
+ expected = {
85
+ 'application' => 'sidekiq',
86
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
87
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
88
+ 'job_class' => 'Geo::MetricsUpdateWorker',
89
+ 'sql' => sql
90
+ }
91
+ test_parse(sql, { 'sql' => sql }, 'sql', false, expected)
92
+ end
93
+
94
+ test 'avoid clash' do
95
+ sql = '/*clash_key:bbb*/ SELECT COUNT(*) FROM "projects"'
96
+ expected = {
97
+ 'clash_key' => 'aaa',
98
+ 'sql_clash_key' => 'bbb',
99
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
100
+ }
101
+ test_parse(sql, { 'clash_key' => 'aaa' }, 'sql', true, expected)
102
+ end
103
+ end
@@ -0,0 +1,22 @@
1
+ #!/bin/sh
2
+
3
+ # This script is use by the end-to-end fluent test
4
+ # See the docker-compose.yml for more details
5
+
6
+ cleanup() {
7
+ echo "# removing all logs"
8
+ rm -rf /var/log/pg/*
9
+ }
10
+
11
+ die() {
12
+ cleanup
13
+ echo "$1"
14
+ exit 1
15
+ }
16
+
17
+ cleanup
18
+ echo "# sleeping 10, awaiting logs"
19
+ sleep 10;
20
+ find /var/log/pg/ -name "pg_stat_statements.*.log" || die "No pg_stat_statements files created"
21
+
22
+ cleanup
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-12 00:00:00.000000000 Z
11
+ date: 2021-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -50,14 +50,14 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '2.0'
53
+ version: '2.1'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '2.0'
60
+ version: '2.1'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rake
63
63
  requirement: !ruby/object:Gem::Requirement
@@ -93,6 +93,7 @@ executables: []
93
93
  extensions: []
94
94
  extra_rdoc_files: []
95
95
  files:
96
+ - ".gitignore"
96
97
  - ".gitlab-ci.yml"
97
98
  - Dockerfile
98
99
  - Gemfile
@@ -105,15 +106,22 @@ files:
105
106
  - lib/fluent/plugin/filter_marginalia.rb
106
107
  - lib/fluent/plugin/filter_postgresql_redactor.rb
107
108
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
109
+ - lib/fluent/plugin/in_pg_stat_activity.rb
108
110
  - lib/fluent/plugin/in_pg_stat_statements.rb
111
+ - lib/fluent/plugin/marginalia_extractor.rb
109
112
  - lib/fluent/plugin/parser_multiline_csv.rb
113
+ - lib/fluent/plugin/polling_pg_input_plugin.rb
110
114
  - sql/create_extension.sql
111
115
  - test/helper.rb
116
+ - test/plugin/itest_in_pg_stat_activity.rb
112
117
  - test/plugin/itest_in_pg_stat_statements.rb
113
118
  - test/plugin/test_filter_marginalia.rb
114
119
  - test/plugin/test_filter_postgresql_redactor.rb
115
120
  - test/plugin/test_filter_postgresql_slowlog.rb
121
+ - test/plugin/test_in_pg_stat_activity.rb
116
122
  - test/plugin/test_in_pg_stat_statements.rb
123
+ - test/plugin/test_marginalia_extractor.rb
124
+ - test/verify-docker-compose.sh
117
125
  homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
118
126
  licenses: []
119
127
  metadata: {}
@@ -138,8 +146,12 @@ specification_version: 4
138
146
  summary: fluentd plugins to work with PostgreSQL CSV logs
139
147
  test_files:
140
148
  - test/helper.rb
149
+ - test/plugin/itest_in_pg_stat_activity.rb
141
150
  - test/plugin/itest_in_pg_stat_statements.rb
142
151
  - test/plugin/test_filter_marginalia.rb
143
152
  - test/plugin/test_filter_postgresql_redactor.rb
144
153
  - test/plugin/test_filter_postgresql_slowlog.rb
154
+ - test/plugin/test_in_pg_stat_activity.rb
145
155
  - test/plugin/test_in_pg_stat_statements.rb
156
+ - test/plugin/test_marginalia_extractor.rb
157
+ - test/verify-docker-compose.sh