fluent-plugin-postgresql-csvlog 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d61fc31718e43c6d1dff46139a1b03d56384405d45b69c9772dc6f7b6a66dbf
4
- data.tar.gz: 5d2d23a4a7b5f277b19f181aa515ab74718f6ef93824e2c958264d9bbdf9c9aa
3
+ metadata.gz: e1cc2264997378ec801c6623768d57ca27ce1c28d6ac8ffcb6a905add2ce9f16
4
+ data.tar.gz: 79aab5d187209db5790f9bca181ea0ca8c909c808809590ad46cef0f6645902c
5
5
  SHA512:
6
- metadata.gz: f2106f60749b6fa8fc931ccd3d85f51595b0ec60eb9f425935072b1749ed068e8a068c6d513f1557a20e8c5a0613acad80c6690c7ce2aa20cca3027d686c388a
7
- data.tar.gz: 9ceef623cbd5256e047dea817d487d2711cc8b387ff068a10913100fe649e77174a600ac019b4729403390152c1a3e6f162accf4cc50f020561032b6493d965f
6
+ metadata.gz: e38e47908ac33706c4b6679fabc97a395bb63f8a110cb6a836c93f984dbfaf1f77f2204209cfe8a0609ddf969ab960cd855ce363398388084a0026e5df0c4970
7
+ data.tar.gz: 94505a6d7ceee368c22b0c32081b85aaf215d28217ceed1ff8f58bab840367f5afe19ed725056394e57fd1d6ebfe3ae98fba66b39eb781cfa2f1f80a602385c6
data/.gitignore ADDED
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/.gitlab-ci.yml CHANGED
@@ -27,3 +27,16 @@ itest:
27
27
  cache:
28
28
  paths:
29
29
  - vendor/ruby
30
+
31
+ end_to_end_verification_test:
32
+ image: docker:19.03.12
33
+ services:
34
+ - docker:19.03.12-dind
35
+ tags:
36
+ - gitlab-org-docker
37
+ variables:
38
+ DOCKER_TLS_CERTDIR: ""
39
+ before_script:
40
+ - apk add --no-cache docker-compose
41
+ script:
42
+ - docker-compose run --rm verifier
data/README.md CHANGED
@@ -7,6 +7,8 @@ parse PostgreSQL CSV log files and extract slow log information:
7
7
  - `PostgreSQLSlowLog`: Extracts slow log entries into `duration_s` and `statement` fields
8
8
  - `PostgreSQLRedactor`: Normalizes the SQL query and redacts sensitive information
9
9
  - `Marginalia`: Parses [Marginalia comments](https://github.com/basecamp/marginalia) into key-value pairs and stores them
10
+ - `PgStatStatementsInput`: polls the [`pg_stat_statements`](https://www.postgresql.org/docs/current/pgstatstatements.html) postgres plugin and emits fluentd events.
11
+ - `PgStatActivityInput`: polls the [`postges activity monitor`](https://www.postgresql.org/docs/current/monitoring-stats.html) and emits fluentd events.
10
12
 
11
13
  ## Installation
12
14
 
@@ -72,3 +74,17 @@ ingest and parse PostgreSQL CSV logs:
72
74
  </format>
73
75
  </match>
74
76
  ```
77
+
78
+ ## Developing `fluent-plugin-postgresql-csvlog`
79
+
80
+ To develop and debug locally, there is a `Dockerfile` and `docker-compose.yml` that will setup a local environment,
81
+ complete with Postgres, suitable for testing purposes.
82
+
83
+ 1. `docker compose rm verifier --rm` - test the current configuration
84
+ 1. `docker compose up`
85
+
86
+ ### Releasing a new version
87
+
88
+ 1. Update the version in `fluent-plugin-postgresql-csvlog.gemspec`.
89
+ 1. Create a merge request and merge the changes to `master`.
90
+ 1. Run `bundle exec rake release`.
data/docker-compose.yml CHANGED
@@ -1,14 +1,9 @@
1
1
  # Docker Compose setup useful for testing and development purposes
2
- version: "3.9"
2
+ version: "3.3"
3
3
  services:
4
- fluentd:
5
- build: .
6
- links:
7
- - postgres
8
- entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
9
4
  postgres:
10
5
  image: postgres
11
- restart: always
6
+ restart: "no"
12
7
  environment:
13
8
  - POSTGRES_USER=testuser
14
9
  - POSTGRES_PASSWORD=testpass
@@ -17,3 +12,26 @@ services:
17
12
  command: postgres -c shared_preload_libraries=pg_stat_statements -c pg_stat_statements.track=all
18
13
  volumes:
19
14
  - ./sql/create_extension.sql:/docker-entrypoint-initdb.d/create_extension.sql
15
+
16
+ fluentd:
17
+ build: .
18
+ restart: "no"
19
+ links:
20
+ - postgres
21
+ entrypoint: /usr/bin/fluentd -vvv -c /src/example-fluentd.conf
22
+ volumes:
23
+ - ./example-fluentd.conf:/src/example-fluentd.conf
24
+ - log-volume:/var/log/pg/
25
+
26
+ verifier:
27
+ image: alpine:3.13
28
+ restart: "no"
29
+ links:
30
+ - fluentd
31
+ command: /bin/sh /src/verify-docker-compose.sh
32
+ volumes:
33
+ - ./test/verify-docker-compose.sh:/src/verify-docker-compose.sh
34
+ - log-volume:/var/log/pg/
35
+
36
+ volumes:
37
+ log-volume:
data/example-fluentd.conf CHANGED
@@ -8,5 +8,18 @@
8
8
  </source>
9
9
 
10
10
  <match postgres.pg_stat_statements>
11
- @type stdout
11
+ @type file
12
+ path /var/log/pg/pg_stat_statements
13
+ time_slice_format %Y%m%d%H%M%S
14
+ flush_interval 1s
15
+ utc
12
16
  </match>
17
+
18
+ <source>
19
+ @type pg_stat_activity
20
+ tag postgres.pg_stat_activity
21
+ host postgres
22
+ username testuser
23
+ password testpass
24
+ interval 1
25
+ </source>
@@ -2,7 +2,7 @@ $:.push File.expand_path('lib', __dir__)
2
2
 
3
3
  Gem::Specification.new do |s|
4
4
  s.name = 'fluent-plugin-postgresql-csvlog'
5
- s.version = '0.3.2'
5
+ s.version = '0.4.0'
6
6
  s.authors = ['stanhu']
7
7
  s.email = ['stanhu@gmail.com']
8
8
  s.homepage = 'https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog'
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
16
16
 
17
17
  s.add_dependency 'fluentd', ['>= 1.0', '< 2']
18
18
  s.add_dependency 'pg', '~> 1.1'
19
- s.add_dependency 'pg_query', '~> 2.0'
19
+ s.add_dependency 'pg_query', '~> 2.1'
20
20
 
21
21
  s.add_development_dependency 'rake'
22
22
  s.add_development_dependency 'test-unit', '~> 3.2'
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'fluent/plugin/filter'
4
+ require_relative './marginalia_extractor'
4
5
 
5
6
  module Fluent
6
7
  module Plugin
@@ -11,6 +12,7 @@ module Fluent
11
12
  # /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"
12
13
  #
13
14
  class Marginalia < Filter
15
+ include MarginaliaExtractor
14
16
  Fluent::Plugin.register_filter('marginalia', self)
15
17
 
16
18
  desc 'Field to parse for Marginalia comments (key1:value1,key2:value2)'
@@ -19,77 +21,11 @@ module Fluent
19
21
  desc 'Whether to strip the comment from the record specified by key'
20
22
  config_param :strip_comment, :bool, default: true
21
23
 
22
- MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
23
- MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
24
-
25
24
  def filter(_tag, _time, record)
26
- parse_comments(record)
25
+ parse_marginalia_into_record(record, @key, @strip_comment)
27
26
 
28
27
  record
29
28
  end
30
-
31
- private
32
-
33
- def parse_comments(record)
34
- sql = record[@key]
35
-
36
- return unless sql
37
-
38
- comment_match = match_marginalia_comment(sql)
39
-
40
- return unless comment_match
41
-
42
- entries = extract_entries(comment_match['comment'])
43
- parse_entries(entries, record)
44
-
45
- record[@key] = comment_match['sql'].strip if @strip_comment
46
- end
47
-
48
- def match_marginalia_comment(sql)
49
- matched = MARGINALIA_PREPENDED_REGEXP.match(sql)
50
-
51
- return matched if matched
52
-
53
- MARGINALIA_APPENDED_REGEXP.match(sql)
54
- end
55
-
56
- def extract_entries(comment)
57
- comment = scrub_comment(comment)
58
-
59
- return [] unless comment
60
-
61
- comment.split(',')
62
- end
63
-
64
- def scrub_comment(comment)
65
- return unless comment
66
-
67
- comment.strip!
68
- comment.gsub!(%r{^/\*}, '')
69
- comment.gsub!(%r{\*/$}, '')
70
- end
71
-
72
- def parse_entries(entries, record)
73
- entries.each do |component|
74
- data = component.split(':', 2)
75
-
76
- break unless data.length == 2
77
-
78
- stored_key = store_key(record, data[0])
79
- record[stored_key] = data[1]
80
- end
81
- end
82
-
83
- def store_key(record, component_key)
84
- # In case there is a conflict with the Marginalia key
85
- # (e.g. `correlation_id`), we use the base key
86
- # (`sql_correlation_id`) instead.
87
- if record.key?(component_key)
88
- "#{@key}_#{component_key}"
89
- else
90
- component_key
91
- end
92
- end
93
29
  end
94
30
  end
95
31
  end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './polling_pg_input_plugin'
4
+ require 'pg_query'
5
+ require_relative './marginalia_extractor'
6
+ require 'time'
7
+
8
+ module Fluent::Plugin
9
+ # PgStatActivityInput polls the `pg_stat_activity` table
10
+ # emitting normalized versions of the queries currently running on
11
+ # the postgres server.
12
+ # Fingerprints of the queries are also included for easier aggregation
13
+ class PgStatActivityInput < PollingPostgresInputPlugin
14
+ include MarginaliaExtractor
15
+ Fluent::Plugin.register_input('pg_stat_activity', self)
16
+
17
+ ACTIVITY_QUERY = <<-SQL
18
+ SELECT
19
+ datid,
20
+ datname,
21
+ pid,
22
+ usesysid,
23
+ usename,
24
+ application_name,
25
+ host(client_addr) as client_addr,
26
+ client_hostname,
27
+ client_port,
28
+ xact_start,
29
+ extract(epoch from clock_timestamp() - xact_start) xact_age_s,
30
+ query_start,
31
+ extract(epoch from clock_timestamp() - query_start) query_age_s,
32
+ state_change,
33
+ extract(epoch from clock_timestamp() - state_change) state_age_s,
34
+ state,
35
+ query
36
+ FROM pg_stat_activity
37
+ WHERE usename IS NOT NULL
38
+ SQL
39
+
40
+ desc 'Name of field to store SQL query fingerprint'
41
+ config_param :fingerprint_key, :string, default: 'fingerprint'
42
+
43
+ protected
44
+
45
+ def on_poll
46
+ with_connection do |conn|
47
+ emit_activity_to_stream(conn)
48
+ end
49
+ end
50
+
51
+ public
52
+
53
+ # Query the database and emit statements to fluentd router
54
+ def emit_activity_to_stream(conn)
55
+ me = Fluent::MultiEventStream.new
56
+
57
+ now = Fluent::Engine.now
58
+ conn.exec(ACTIVITY_QUERY).each do |row|
59
+ record = record_for_row(row)
60
+ me.add(now, record)
61
+ end
62
+
63
+ @router.emit_stream(@tag, me)
64
+ end
65
+
66
+ # Returns a fluentd record for a query row
67
+ def record_for_row(row)
68
+ record = {
69
+ 'datid' => row['datid'],
70
+ 'datname' => row['datname'],
71
+ 'pid' => row['pid'],
72
+ 'usesysid' => row['usesysid'],
73
+ 'usename' => row['usename'],
74
+ 'application_name' => row['application_name'],
75
+ 'client_addr' => row['client_addr'],
76
+ 'client_hostname' => row['client_hostname'],
77
+ 'client_port' => row['client_port'],
78
+ 'xact_start' => row['xact_start']&.iso8601(3),
79
+ 'xact_age_s' => row['xact_age_s'],
80
+ 'query_start' => row['query_start']&.iso8601(3),
81
+ 'query_age_s' => row['query_age_s'],
82
+ 'state_change' => row['state_change']&.iso8601(3),
83
+ 'state_age_s' => row['state_age_s'],
84
+ 'state' => row['state'],
85
+ 'query' => row['query'] # This will be stripped, normalized etc
86
+ }
87
+
88
+ # Inject marginalia into record
89
+ parse_marginalia_into_record(record, 'query', true)
90
+
91
+ # Normalize query and fingerprint
92
+ # Note that `record['query']` was updated in previous step
93
+ # To strip off marginalia comments
94
+ record.merge!(fingerprint_query(record['query']))
95
+
96
+ record
97
+ end
98
+
99
+ def fingerprint_query(query)
100
+ # We record the query_length as it will help in understanding whether unparseable
101
+ # queries are truncated.
102
+ record = { 'query_length' => query&.length, 'query' => nil }
103
+
104
+ return record unless query
105
+
106
+ normalized = PgQuery.normalize(query)
107
+ record['query'] = normalized
108
+
109
+ record[@fingerprint_key] = PgQuery.parse(normalized).fingerprint if @fingerprint_key
110
+
111
+ record
112
+ rescue PgQuery::ParseError
113
+ record['query_unparseable'] = true
114
+
115
+ record
116
+ end
117
+ end
118
+ end
@@ -1,7 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'fluent/plugin/input'
4
- require 'pg'
3
+ require_relative './polling_pg_input_plugin'
5
4
  require 'pg_query'
6
5
 
7
6
  module Fluent::Plugin
@@ -14,67 +13,22 @@ module Fluent::Plugin
14
13
  # 'query_length' => 58,
15
14
  # 'queryid' => 3239318621761098074
16
15
  # }
17
- class PgStatStatementsInput < Input
16
+ class PgStatStatementsInput < PollingPostgresInputPlugin
18
17
  Fluent::Plugin.register_input('pg_stat_statements', self)
19
18
 
20
- desc 'PostgreSQL host'
21
- config_param :host, :string
22
-
23
- desc 'RDBMS port (default: 5432)'
24
- config_param :port, :integer, default: 5432
25
-
26
- desc 'login user name'
27
- config_param :username, :string, default: nil
28
-
29
- desc 'postgres db'
30
- config_param :dbname, :string, default: nil
31
-
32
- desc 'login password'
33
- config_param :password, :string, default: nil, secret: true
34
-
35
- # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
36
- # for options
37
- desc 'postgres sslmode'
38
- config_param :sslmode, :string, default: 'prefer'
39
-
40
- desc 'tag'
41
- config_param :tag, :string, default: nil
42
-
43
- desc 'interval in second to run query'
44
- config_param :interval, :time, default: 300
45
-
46
19
  desc 'Name of field to store SQL query fingerprint'
47
20
  config_param :fingerprint_key, :string, default: 'fingerprint'
48
21
 
49
- def start
50
- @stop_flag = false
51
- @thread = Thread.new(&method(:thread_main))
52
- end
22
+ protected
53
23
 
54
- def shutdown
55
- @stop_flag = true
56
-
57
- # Interrupt thread and wait for it to finish
58
- Thread.new { @thread.run } if @thread
59
- @thread.join
60
- end
61
-
62
- def thread_main
63
- until @stop_flag
64
- sleep @interval
65
- break if @stop_flag
66
-
67
- begin
68
- with_connection do |conn|
69
- emit_statements_to_stream(conn)
70
- end
71
- rescue StandardError => e
72
- log.error 'unexpected error', error: e.message, error_class: e.class
73
- log.error_backtrace e.backtrace
74
- end
24
+ def on_poll
25
+ with_connection do |conn|
26
+ emit_statements_to_stream(conn)
75
27
  end
76
28
  end
77
29
 
30
+ public
31
+
78
32
  # Returns a fluentd record for a query row
79
33
  def record_for_row(row)
80
34
  query = row['query']
@@ -97,8 +51,6 @@ module Fluent::Plugin
97
51
  record
98
52
  end
99
53
 
100
- private
101
-
102
54
  # Query the database and emit statements to fluentd router
103
55
  def emit_statements_to_stream(conn)
104
56
  me = Fluent::MultiEventStream.new
@@ -111,26 +63,5 @@ module Fluent::Plugin
111
63
 
112
64
  @router.emit_stream(@tag, me)
113
65
  end
114
-
115
- # Since this query is very infrequent, and it may be communicating directly
116
- # with postgres without pgbouncer, don't use a persistent connection and
117
- # ensure that it is properly closed
118
- def with_connection(&block)
119
- conn = PG.connect(
120
- host: @host,
121
- dbname: @dbname,
122
- sslmode: @sslmode,
123
- user: @username,
124
- password: @password
125
- )
126
- conn.type_map_for_results = PG::BasicTypeMapForResults.new conn
127
-
128
- begin
129
- block.call(conn)
130
- ensure
131
- # Always close the connection
132
- conn.finish
133
- end
134
- end
135
66
  end
136
67
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/filter'
4
+
5
+ module Fluent::Plugin
6
+ # MarginaliaExtractor provides the parse_marginalia_into_record
7
+ # utility method, useful for extracting marginalia into fluentd records
8
+ module MarginaliaExtractor
9
+ MARGINALIA_PREPENDED_REGEXP = %r{^(?<comment>/\*.*\*/)(?<sql>.*)}m.freeze
10
+ MARGINALIA_APPENDED_REGEXP = %r{(?<sql>.*)(?<comment>/\*.*\*/)$}m.freeze
11
+
12
+ # Injects marginalia into a fluentd record
13
+ def parse_marginalia_into_record(record, key, strip_comment)
14
+ sql = record[key]
15
+ return unless sql
16
+
17
+ comment_match = match_marginalia_comment(sql)
18
+
19
+ return unless comment_match
20
+
21
+ entries = extract_entries(comment_match['comment'])
22
+ parse_entries(entries, key, record)
23
+
24
+ record[key] = comment_match['sql'].strip if strip_comment
25
+ end
26
+
27
+ def match_marginalia_comment(sql)
28
+ matched = MARGINALIA_PREPENDED_REGEXP.match(sql)
29
+
30
+ return matched if matched
31
+
32
+ MARGINALIA_APPENDED_REGEXP.match(sql)
33
+ end
34
+
35
+ def extract_entries(comment)
36
+ comment = scrub_comment(comment)
37
+
38
+ return [] unless comment
39
+
40
+ comment.split(',')
41
+ end
42
+
43
+ def scrub_comment(comment)
44
+ return unless comment
45
+
46
+ comment.strip!
47
+ comment.gsub!(%r{^/\*}, '')
48
+ comment.gsub!(%r{\*/$}, '')
49
+ end
50
+
51
+ def parse_entries(entries, key, record)
52
+ entries.each do |component|
53
+ data = component.split(':', 2)
54
+
55
+ break unless data.length == 2
56
+
57
+ stored_key = store_key(record, key, data[0])
58
+ record[stored_key] = data[1]
59
+ end
60
+ end
61
+
62
+ def store_key(record, key, component_key)
63
+ # In case there is a conflict with the Marginalia key
64
+ # (e.g. `correlation_id`), we use the base key
65
+ # (`sql_correlation_id`) instead.
66
+ if record.key?(component_key)
67
+ "#{key}_#{component_key}"
68
+ else
69
+ component_key
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'fluent/plugin/input'
4
+ require 'pg'
5
+ require 'pg_query'
6
+
7
+ module Fluent::Plugin
8
+ # PollingPostgresInputPlugin is intended to be used as an base class
9
+ # for input plugins that poll postgres.
10
+ #
11
+ # Child classes should implement the `on_poll` method
12
+ class PollingPostgresInputPlugin < Input
13
+ desc 'PostgreSQL host'
14
+ config_param :host, :string
15
+
16
+ desc 'RDBMS port (default: 5432)'
17
+ config_param :port, :integer, default: 5432
18
+
19
+ desc 'login user name'
20
+ config_param :username, :string, default: nil
21
+
22
+ desc 'postgres db'
23
+ config_param :dbname, :string, default: nil
24
+
25
+ desc 'login password'
26
+ config_param :password, :string, default: nil, secret: true
27
+
28
+ # See https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNECT-SSLMODE
29
+ # for options
30
+ desc 'postgres sslmode'
31
+ config_param :sslmode, :string, default: 'prefer'
32
+
33
+ desc 'tag'
34
+ config_param :tag, :string, default: nil
35
+
36
+ desc 'interval in second to run query'
37
+ config_param :interval, :time, default: 300
38
+
39
+ def start
40
+ @stop_flag = false
41
+ @thread = Thread.new(&method(:thread_main))
42
+ end
43
+
44
+ # Fluentd shutdown method, called to terminate and cleanup plugin
45
+ def shutdown
46
+ @stop_flag = true
47
+
48
+ # Interrupt thread and wait for it to finish
49
+ Thread.new { @thread.run } if @thread
50
+ @thread.join
51
+ end
52
+
53
+ # Main polling loop on thread
54
+ def thread_main
55
+ until @stop_flag
56
+ sleep @interval
57
+ break if @stop_flag
58
+
59
+ begin
60
+ on_poll
61
+ rescue StandardError => e
62
+ log.error 'unexpected error', error: e.message, error_class: e.class
63
+ log.error_backtrace e.backtrace
64
+ end
65
+ end
66
+ end
67
+
68
+ protected
69
+
70
+ # Child-classes should implement this method
71
+ def on_poll
72
+ raise 'on_poll must be implemented by descendents of PollingPostgresInputPlugin'
73
+ end
74
+
75
+ # Since this query is very infrequent, and it may be communicating directly
76
+ # with postgres without pgbouncer, don't use a persistent connection and
77
+ # ensure that it is properly closed
78
+ def with_connection(&block)
79
+ conn = PG.connect(
80
+ host: @host,
81
+ dbname: @dbname,
82
+ sslmode: @sslmode,
83
+ user: @username,
84
+ password: @password
85
+ )
86
+
87
+ map = PG::BasicTypeMapForResults.new(conn)
88
+ map.default_type_map = PG::TypeMapAllStrings.new
89
+
90
+ conn.type_map_for_results = map
91
+
92
+ begin
93
+ block.call(conn)
94
+ ensure
95
+ # Always close the connection
96
+ conn.finish
97
+ end
98
+ end
99
+ end
100
+ end
data/test/helper.rb CHANGED
@@ -16,4 +16,6 @@ Test::Unit::TestCase.extend(Fluent::Test::Helpers)
16
16
  require 'fluent/plugin/filter_postgresql_slowlog'
17
17
  require 'fluent/plugin/filter_postgresql_redactor'
18
18
  require 'fluent/plugin/filter_marginalia'
19
+ require 'fluent/plugin/marginalia_extractor'
19
20
  require 'fluent/plugin/in_pg_stat_statements'
21
+ require 'fluent/plugin/in_pg_stat_activity'
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class PgStatActivityInputIntegrationTest < Test::Unit::TestCase
6
+ # The defaults values work with the configuration in .gitlab-ci.yml on the postgres service
7
+ # Override with env vars for local development
8
+ HOST = ENV.fetch('PG_TEST_HOST', 'postgres')
9
+ USERNAME = ENV.fetch('PG_TEST_USER', 'testuser')
10
+ PASSWORD = ENV.fetch('PG_TEST_PASSWORD', 'testpass')
11
+
12
+ def setup
13
+ Fluent::Test.setup
14
+
15
+ @conn = PG.connect(
16
+ host: HOST,
17
+ user: USERNAME,
18
+ password: PASSWORD
19
+ )
20
+ end
21
+
22
+ def teardown
23
+ @conn&.finish
24
+ end
25
+
26
+ VALID_CONFIG = %(
27
+ tag postgres.pg_stat_statements
28
+ host #{HOST}
29
+ username #{USERNAME}
30
+ password #{PASSWORD}
31
+ interval 1
32
+ )
33
+
34
+ INVALID_CONFIG = %(
35
+ host 'invalid_host.dne'
36
+ port 1234
37
+ username #{USERNAME}
38
+ password #{PASSWORD}
39
+ interval 1
40
+ )
41
+
42
+ def create_driver(config)
43
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatActivityInput).configure(config)
44
+ end
45
+
46
+ sub_test_case 'configuration' do
47
+ test 'connects' do
48
+ d = create_driver(VALID_CONFIG)
49
+
50
+ emits = []
51
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
52
+ d.run(num_waits = 50) do
53
+ emits = d.emits
54
+ end
55
+
56
+ assert_false emits.empty?
57
+ end
58
+
59
+ # Why do we have this test? If postgres is still starting up, we don't want to cause the
60
+ # the fluentd configuration to fail. We would rather retry until we get a connection
61
+ test 'connects for an invalid config' do
62
+ d = create_driver(INVALID_CONFIG)
63
+
64
+ emits = []
65
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
66
+ d.run(num_waits = 50) do
67
+ emits = d.emits
68
+ end
69
+
70
+ assert_true emits.empty?
71
+ end
72
+ end
73
+
74
+ sub_test_case 'execution' do
75
+ test 'connects' do
76
+ d = create_driver(VALID_CONFIG)
77
+
78
+ emits = []
79
+ # wait 50 * 0.05, "see fluentd/lib/fluent/test/base.rb:79 num_waits.times { sleep 0.05 }
80
+ d.run(num_waits = 50) do
81
+ emits = d.emits
82
+ end
83
+
84
+ first_with_query = emits.find do |event|
85
+ record = event[2]
86
+
87
+ record['usename'] == USERNAME &&
88
+ !record['datid'].nil? &&
89
+ !record['query'].nil? &&
90
+ record['state'] == 'active'
91
+ end
92
+
93
+ assert_false first_with_query.nil?
94
+ record = first_with_query[2]
95
+
96
+ assert_false record['datname'].nil?
97
+ assert_false record['pid'].nil?
98
+ assert_false record['usesysid'].nil?
99
+ assert_false record['application_name'].nil?
100
+ assert_false record['client_addr'].nil?
101
+ assert_false record['client_port'].nil?
102
+ assert_false record['xact_start'].nil?
103
+ assert_false record['xact_age_s'].nil?
104
+ assert_false record['xact_start'].nil?
105
+ assert_false record['xact_age_s'].nil?
106
+ assert_false record['query_start'].nil?
107
+ assert_false record['query_age_s'].nil?
108
+ assert_false record['state_change'].nil?
109
+ assert_false record['state_age_s'].nil?
110
+ assert_false record['query_length'].nil?
111
+ assert_false record['query'].nil?
112
+ assert_false record['fingerprint'].nil?
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+ require 'date'
5
+
6
+ class PgStatActivityInputTest < Test::Unit::TestCase
7
+ def setup
8
+ Fluent::Test.setup
9
+ end
10
+
11
+ CONFIG = %(
12
+ tag postgres.pg_stat_activity
13
+ host localhost
14
+ port 1234
15
+ dbname gitlab
16
+ sslmode require
17
+ username moo
18
+ password secret
19
+ interval 600
20
+ fingerprint_key fingerprint
21
+ )
22
+
23
+ def create_driver
24
+ Fluent::Test::InputTestDriver.new(Fluent::Plugin::PgStatActivityInput).configure(CONFIG)
25
+ end
26
+
27
+ sub_test_case 'configuration' do
28
+ test 'basic configuration' do
29
+ d = create_driver
30
+
31
+ assert_equal 'postgres.pg_stat_activity', d.instance.tag
32
+ assert_equal 'localhost', d.instance.host
33
+ assert_equal 1234, d.instance.port
34
+ assert_equal 'gitlab', d.instance.dbname
35
+ assert_equal 'require', d.instance.sslmode
36
+ assert_equal 'moo', d.instance.username
37
+ assert_equal 'secret', d.instance.password
38
+ assert_equal 600, d.instance.interval
39
+ assert_equal 'fingerprint', d.instance.fingerprint_key
40
+ end
41
+ end
42
+
43
+ sub_test_case 'execution' do
44
+ test 'sql' do
45
+ d = create_driver
46
+ row = {
47
+ 'datid' => 16384,
48
+ 'datname' => 'testuser',
49
+ 'pid' => 376,
50
+ 'usesysid' => 10,
51
+ 'usename' => 'testuser',
52
+ 'application_name' => 'psql',
53
+ 'client_addr' => '172.17.0.1',
54
+ 'client_hostname' => nil,
55
+ 'client_port' => 60182,
56
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
57
+ 'xact_age_s' => 0.001884,
58
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
59
+ 'query_age_s' => 0.001894,
60
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
61
+ 'state_age_s' => 0.001894,
62
+ 'state' => 'active',
63
+ 'query' => "SELECT * FROM users WHERE user_secret = 's3cr3t'"
64
+ }
65
+
66
+ record = d.instance.record_for_row(row)
67
+
68
+ expected = {
69
+ 'application_name' => 'psql',
70
+ 'client_addr' => '172.17.0.1',
71
+ 'client_hostname' => nil,
72
+ 'client_port' => 60182,
73
+ 'datid' => 16384,
74
+ 'datname' => 'testuser',
75
+ 'fingerprint' => '5c4a61e156c7d822',
76
+ 'pid' => 376,
77
+ 'query' => 'SELECT * FROM users WHERE user_secret = $1', # NOTE: secret removed
78
+ 'query_age_s' => 0.001894,
79
+ 'query_length' => 48,
80
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
81
+ 'state' => 'active',
82
+ 'state_age_s' => 0.001894,
83
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
84
+ 'usename' => 'testuser',
85
+ 'usesysid' => 10,
86
+ 'xact_age_s' => 0.001884,
87
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
88
+ }
89
+
90
+ assert_equal expected, record
91
+ end
92
+
93
+ # This test mostly checks that the code is nil safe
94
+ test 'nil query' do
95
+ d = create_driver
96
+ record = d.instance.record_for_row({})
97
+
98
+ expected = {
99
+ 'application_name' => nil,
100
+ 'client_addr' => nil,
101
+ 'client_hostname' => nil,
102
+ 'client_port' => nil,
103
+ 'datid' => nil,
104
+ 'datname' => nil,
105
+ 'pid' => nil,
106
+ 'query' => nil,
107
+ 'query_age_s' => nil,
108
+ 'query_length' => nil,
109
+ 'query_start' => nil,
110
+ 'state' => nil,
111
+ 'state_age_s' => nil,
112
+ 'state_change' => nil,
113
+ 'usename' => nil,
114
+ 'usesysid' => nil,
115
+ 'xact_age_s' => nil,
116
+ 'xact_start' => nil
117
+ }
118
+
119
+ assert_equal expected, record
120
+ end
121
+
122
+ test 'unparseable sql' do
123
+ d = create_driver
124
+ row = {
125
+ 'datid' => 16384,
126
+ 'datname' => 'testuser',
127
+ 'pid' => 376,
128
+ 'usesysid' => 10,
129
+ 'usename' => 'testuser',
130
+ 'application_name' => 'psql',
131
+ 'client_addr' => '172.17.0.1',
132
+ 'client_hostname' => nil,
133
+ 'client_port' => 60182,
134
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
135
+ 'xact_age_s' => 0.001884,
136
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
137
+ 'query_age_s' => 0.001894,
138
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
139
+ 'state_age_s' => 0.001894,
140
+ 'state' => 'active',
141
+ 'query' => "SELECT * FROM users WHERE user_se="
142
+ }
143
+
144
+ record = d.instance.record_for_row(row)
145
+
146
+ expected = {
147
+ 'application_name' => 'psql',
148
+ 'client_addr' => '172.17.0.1',
149
+ 'client_hostname' => nil,
150
+ 'client_port' => 60182,
151
+ 'datid' => 16384,
152
+ 'datname' => 'testuser',
153
+ 'pid' => 376,
154
+ 'query' => nil,
155
+ 'query_age_s' => 0.001894,
156
+ 'query_length' => 34,
157
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
158
+ 'query_unparseable' => true,
159
+ 'state' => 'active',
160
+ 'state_age_s' => 0.001894,
161
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
162
+ 'usename' => 'testuser',
163
+ 'usesysid' => 10,
164
+ 'xact_age_s' => 0.001884,
165
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
166
+ }
167
+
168
+ assert_equal expected, record
169
+ end
170
+
171
+ test 'marginalia prepended' do
172
+ d = create_driver
173
+ row = {
174
+ 'datid' => 16384,
175
+ 'datname' => 'testuser',
176
+ 'pid' => 376,
177
+ 'usesysid' => 10,
178
+ 'usename' => 'testuser',
179
+ 'application_name' => 'psql',
180
+ 'client_addr' => '172.17.0.1',
181
+ 'client_hostname' => nil,
182
+ 'client_port' => 60182,
183
+ 'xact_start' => Time.parse('2021-07-23 12:55:25 +0000'),
184
+ 'xact_age_s' => 0.001884,
185
+ 'query_start' => Time.parse('2021-07-23 12:55:25 +0000'),
186
+ 'query_age_s' => 0.001894,
187
+ 'state_change' => Time.parse('2021-07-23 12:55:25 +0000'),
188
+ 'state_age_s' => 0.001894,
189
+ 'state' => 'active',
190
+ 'query' => "/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT * FROM users WHERE user_secret = 's3cr3t'"
191
+ }
192
+
193
+ record = d.instance.record_for_row(row)
194
+
195
+ expected = {
196
+ 'application' => 'web',
197
+ 'application_name' => 'psql',
198
+ 'client_addr' => '172.17.0.1',
199
+ 'client_hostname' => nil,
200
+ 'client_port' => 60182,
201
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
202
+ 'datid' => 16384,
203
+ 'datname' => 'testuser',
204
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
205
+ 'fingerprint' => '5c4a61e156c7d822',
206
+ 'pid' => 376,
207
+ 'query' => 'SELECT * FROM users WHERE user_secret = $1', # Secret removed
208
+ 'query_age_s' => 0.001894,
209
+ 'query_length' => 48,
210
+ 'query_start' => '2021-07-23T12:55:25.000+00:00',
211
+ 'state' => 'active',
212
+ 'state_age_s' => 0.001894,
213
+ 'state_change' => '2021-07-23T12:55:25.000+00:00',
214
+ 'usename' => 'testuser',
215
+ 'usesysid' => 10,
216
+ 'xact_age_s' => 0.001884,
217
+ 'xact_start' => '2021-07-23T12:55:25.000+00:00'
218
+ }
219
+
220
+ assert_equal expected, record
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../helper'
4
+
5
+ class Marginalia < Test::Unit::TestCase
6
+ include Fluent::Plugin::MarginaliaExtractor
7
+
8
+ def test_parse(sql, record, key, strip_comment, expected)
9
+ record[key] = sql
10
+ parse_marginalia_into_record(record, key, strip_comment)
11
+ assert_equal(expected, record)
12
+ end
13
+
14
+ test 'no marginalia' do
15
+ sql = 'SELECT * FROM projects'
16
+ expected = { 'sql' => 'SELECT * FROM projects' }
17
+ test_parse(sql, {}, 'sql', true, expected)
18
+ end
19
+
20
+ test 'normal comment appended' do
21
+ sql = 'SELECT COUNT(*) FROM "projects" /* this is just a comment */'
22
+ expected = {
23
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
24
+ }
25
+ test_parse(sql, {}, 'sql', true, expected)
26
+ end
27
+
28
+ test 'marginalia appended for sidekiq' do
29
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/'
30
+ expected = {
31
+ 'application' => 'sidekiq',
32
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
33
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
34
+ 'job_class' => 'Geo::MetricsUpdateWorker',
35
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
36
+ }
37
+ test_parse(sql, {}, 'sql', true, expected)
38
+ end
39
+
40
+ test 'marginalia appended for web' do
41
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/'
42
+ expected = {
43
+ 'application' => 'web',
44
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
45
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
46
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
47
+ }
48
+ test_parse(sql, {}, 'sql', true, expected)
49
+ end
50
+
51
+ test 'normal comment prepended' do
52
+ sql = '/* this is just a comment */ SELECT COUNT(*) FROM "projects"'
53
+ expected = {
54
+ "sql" => 'SELECT COUNT(*) FROM "projects"'
55
+ }
56
+ test_parse(sql, {}, 'sql', true, expected)
57
+ end
58
+
59
+ test 'marginalia prepended for sidekiq' do
60
+ sql = '/*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/ SELECT COUNT(*) FROM "projects"'
61
+ expected = {
62
+ 'application' => 'sidekiq',
63
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
64
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
65
+ 'job_class' => 'Geo::MetricsUpdateWorker',
66
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
67
+ }
68
+ test_parse(sql, {}, 'sql', true, expected)
69
+ end
70
+
71
+ test 'marginalia prepended for web' do
72
+ sql = '/*application:web,correlation_id:01F1D2T1SC9DM82A4865ATG1CP,endpoint_id:POST /api/:version/groups/:id/-/packages/mavenpath/:file_name*/ SELECT COUNT(*) FROM "projects"'
73
+ expected = {
74
+ 'application' => 'web',
75
+ 'correlation_id' => '01F1D2T1SC9DM82A4865ATG1CP',
76
+ 'endpoint_id' => 'POST /api/:version/groups/:id/-/packages/mavenpath/:file_name',
77
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
78
+ }
79
+ test_parse(sql, {}, 'sql', true, expected)
80
+ end
81
+
82
+ test 'marginalia prepended for web, comment_strip disabled' do
83
+ sql = 'SELECT COUNT(*) FROM "projects" /*application:sidekiq,correlation_id:d67cae54c169e0cab7d73389e2934f0e,jid:52a1c8a9e4c555ea573f20f0,job_class:Geo::MetricsUpdateWorker*/'
84
+ expected = {
85
+ 'application' => 'sidekiq',
86
+ 'correlation_id' => 'd67cae54c169e0cab7d73389e2934f0e',
87
+ 'jid' => '52a1c8a9e4c555ea573f20f0',
88
+ 'job_class' => 'Geo::MetricsUpdateWorker',
89
+ 'sql' => sql
90
+ }
91
+ test_parse(sql, { 'sql' => sql }, 'sql', false, expected)
92
+ end
93
+
94
+ test 'avoid clash' do
95
+ sql = '/*clash_key:bbb*/ SELECT COUNT(*) FROM "projects"'
96
+ expected = {
97
+ 'clash_key' => 'aaa',
98
+ 'sql_clash_key' => 'bbb',
99
+ 'sql' => 'SELECT COUNT(*) FROM "projects"'
100
+ }
101
+ test_parse(sql, { 'clash_key' => 'aaa' }, 'sql', true, expected)
102
+ end
103
+ end
@@ -0,0 +1,22 @@
1
+ #!/bin/sh
2
+
3
+ # This script is use by the end-to-end fluent test
4
+ # See the docker-compose.yml for more details
5
+
6
+ cleanup() {
7
+ echo "# removing all logs"
8
+ rm -rf /var/log/pg/*
9
+ }
10
+
11
+ die() {
12
+ cleanup
13
+ echo "$1"
14
+ exit 1
15
+ }
16
+
17
+ cleanup
18
+ echo "# sleeping 10, awaiting logs"
19
+ sleep 10;
20
+ find /var/log/pg/ -name "pg_stat_statements.*.log" || die "No pg_stat_statements files created"
21
+
22
+ cleanup
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-postgresql-csvlog
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.2
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - stanhu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-07-12 00:00:00.000000000 Z
11
+ date: 2021-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -50,14 +50,14 @@ dependencies:
50
50
  requirements:
51
51
  - - "~>"
52
52
  - !ruby/object:Gem::Version
53
- version: '2.0'
53
+ version: '2.1'
54
54
  type: :runtime
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  requirements:
58
58
  - - "~>"
59
59
  - !ruby/object:Gem::Version
60
- version: '2.0'
60
+ version: '2.1'
61
61
  - !ruby/object:Gem::Dependency
62
62
  name: rake
63
63
  requirement: !ruby/object:Gem::Requirement
@@ -93,6 +93,7 @@ executables: []
93
93
  extensions: []
94
94
  extra_rdoc_files: []
95
95
  files:
96
+ - ".gitignore"
96
97
  - ".gitlab-ci.yml"
97
98
  - Dockerfile
98
99
  - Gemfile
@@ -105,15 +106,22 @@ files:
105
106
  - lib/fluent/plugin/filter_marginalia.rb
106
107
  - lib/fluent/plugin/filter_postgresql_redactor.rb
107
108
  - lib/fluent/plugin/filter_postgresql_slowlog.rb
109
+ - lib/fluent/plugin/in_pg_stat_activity.rb
108
110
  - lib/fluent/plugin/in_pg_stat_statements.rb
111
+ - lib/fluent/plugin/marginalia_extractor.rb
109
112
  - lib/fluent/plugin/parser_multiline_csv.rb
113
+ - lib/fluent/plugin/polling_pg_input_plugin.rb
110
114
  - sql/create_extension.sql
111
115
  - test/helper.rb
116
+ - test/plugin/itest_in_pg_stat_activity.rb
112
117
  - test/plugin/itest_in_pg_stat_statements.rb
113
118
  - test/plugin/test_filter_marginalia.rb
114
119
  - test/plugin/test_filter_postgresql_redactor.rb
115
120
  - test/plugin/test_filter_postgresql_slowlog.rb
121
+ - test/plugin/test_in_pg_stat_activity.rb
116
122
  - test/plugin/test_in_pg_stat_statements.rb
123
+ - test/plugin/test_marginalia_extractor.rb
124
+ - test/verify-docker-compose.sh
117
125
  homepage: https://gitlab.com/gitlab-org/fluent-plugins/fluent-plugin-postgresql-csvlog
118
126
  licenses: []
119
127
  metadata: {}
@@ -138,8 +146,12 @@ specification_version: 4
138
146
  summary: fluentd plugins to work with PostgreSQL CSV logs
139
147
  test_files:
140
148
  - test/helper.rb
149
+ - test/plugin/itest_in_pg_stat_activity.rb
141
150
  - test/plugin/itest_in_pg_stat_statements.rb
142
151
  - test/plugin/test_filter_marginalia.rb
143
152
  - test/plugin/test_filter_postgresql_redactor.rb
144
153
  - test/plugin/test_filter_postgresql_slowlog.rb
154
+ - test/plugin/test_in_pg_stat_activity.rb
145
155
  - test/plugin/test_in_pg_stat_statements.rb
156
+ - test/plugin/test_marginalia_extractor.rb
157
+ - test/verify-docker-compose.sh