fluent-plugin-uri-cma-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +12 -0
- data/Gemfile +4 -0
- data/README.md +104 -0
- data/Rakefile +10 -0
- data/fluent-plugin-uri-parser.gemspec +29 -0
- data/lib/fluent/plugin/filter_query_string_parser.rb +79 -0
- data/lib/fluent/plugin/filter_uri_parser.rb +63 -0
- data/wercker.yml +25 -0
- metadata +127 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8c10992d5f384ddb2778a65b1a0da29102ad683e735ea02f3f124068f7db9a09
|
4
|
+
data.tar.gz: 2526dc16b541e89ef61d961446e41d5c4d3066fa00b82dfc2c629d8c5ec6da4b
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ebaad4f371e53c04c8b1162b96651fbb2151a27df3d36aef31af86e4190b2b4a781beef1d3924006c6725c680460e5c27371aeaa3aa80c812cafd1a067b63b18
|
7
|
+
data.tar.gz: 537f66fc203fcd709e305864052ce042682c00bd66ef76c2b632ce016d5de5e2db83138543d4857ff67eb9f72837662a0d25488f6685ced478898f89345bd0bf
|
data/.gitignore
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
# fluent-plugin-uri-cma-parser
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/fluent-plugin-uri-parser) [](https://app.wercker.com/project/bykey/a735d29143f3a1a727fc65653bc81e2a)
|
4
|
+
|
5
|
+
This is a Fluentd plugin to parse uri and query string in log messages.
|
6
|
+
|
7
|
+
## Requirements
|
8
|
+
|
9
|
+
| fluent-plugin-uri-cma-parser | fluentd | ruby |
|
10
|
+
|---------------------------|------------|--------|
|
11
|
+
| >= 0.3.0 | >= v0.14.0 | >= 2.1 |
|
12
|
+
| < 0.2.0 | >= v0.12.0 | >= 1.9 |
|
13
|
+
|
14
|
+
## Installation
|
15
|
+
|
16
|
+
``` shell
|
17
|
+
$ gem install fluent-plugin-uri-cma-parser -v "~> 0.2" # for fluentd v0.12 or later
|
18
|
+
$ gem install fluent-plugin-uri-cma-parser # for fluentd v0.14 or later
|
19
|
+
```
|
20
|
+
|
21
|
+
## Component
|
22
|
+
|
23
|
+
### URIParserFilter
|
24
|
+
|
25
|
+
This is a Fluentd plugin to parse and filtering uri in log messages and re-emit them.
|
26
|
+
|
27
|
+
### QueryStringParserFilter
|
28
|
+
|
29
|
+
This is a Fluentd plugin to parse and filtering query string in log messages and re-emit them.
|
30
|
+
|
31
|
+
## Configuration
|
32
|
+
|
33
|
+
```
|
34
|
+
<filter>
|
35
|
+
@type uri_parser
|
36
|
+
key_name uri
|
37
|
+
inject_key_prefix parsed
|
38
|
+
# hash_value_field parsed
|
39
|
+
# suppress_parse_error_log false
|
40
|
+
# ignore_key_not_exist false
|
41
|
+
# ignore_nil false
|
42
|
+
|
43
|
+
out_key_scheme scheme
|
44
|
+
out_key_host host
|
45
|
+
out_key_port port
|
46
|
+
out_key_path path
|
47
|
+
out_key_query query
|
48
|
+
out_key_fragment fragment
|
49
|
+
</filter>
|
50
|
+
# input string of data: {"uri": "http://example.com/path?foo=bar#t=1"}
|
51
|
+
# output data: {"parsed.scheme":"http","parsed.host":"example.com","parsed.port":80,"parsed.path":"/path","parsed.query":"foo=bar","parsed.ragment":"t=1"}
|
52
|
+
|
53
|
+
<filter>
|
54
|
+
@type query_string_parser
|
55
|
+
key_name parsed.query
|
56
|
+
hash_value_field query
|
57
|
+
# inject_key_prefix query
|
58
|
+
# suppress_parse_error_log false
|
59
|
+
# ignore_key_not_exist false
|
60
|
+
</filter>
|
61
|
+
# input string of data: {"parsed.query": "foo=bar"}
|
62
|
+
# output data: {"query":{"foo":"bar"}}
|
63
|
+
|
64
|
+
```
|
65
|
+
|
66
|
+
**key_name (Required)**
|
67
|
+
|
68
|
+
Key of the value to be parsed in the record.
|
69
|
+
|
70
|
+
**hash_value_field (Default: '')**
|
71
|
+
|
72
|
+
If a value is set, the value after parsing is stored in hash with key specified value.
|
73
|
+
|
74
|
+
**inject_key_prefix (Default: '')**
|
75
|
+
|
76
|
+
If you set a value, set the value specified for the key after parsing as prefix.
|
77
|
+
|
78
|
+
**suppress_parse_error_log (Default: false)**
|
79
|
+
|
80
|
+
If set to `true`, no error log is output even if parsing fails.
|
81
|
+
|
82
|
+
**ignore_key_not_exist (Default: false)**
|
83
|
+
|
84
|
+
If set to `true`, if the field specified by `key_name` does not exist, the record will not be emit to the next stream. That means that the data will be lost there.
|
85
|
+
|
86
|
+
**ignore_nil (Default: false)**
|
87
|
+
|
88
|
+
If set to `true`, exclude key if the value after parse is nil.
|
89
|
+
|
90
|
+
**multi_value_params (Default: false)**
|
91
|
+
|
92
|
+
If set to `true`, then resulting values would be arrays containing
|
93
|
+
potentially multiple values of a given parameter.
|
94
|
+
|
95
|
+
## Development
|
96
|
+
|
97
|
+
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
98
|
+
|
99
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
100
|
+
|
101
|
+
## Contributing
|
102
|
+
|
103
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/daichirata/fluent-plugin-uri-cma-parser.
|
104
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "fluent-plugin-uri-cma-parser"
|
7
|
+
spec.version = "0.1.0"
|
8
|
+
spec.authors = ["Philippe BENAOUN"]
|
9
|
+
spec.email = ["philippe.benaoun@gmail.com"]
|
10
|
+
spec.license = "Apache-2.0"
|
11
|
+
|
12
|
+
spec.summary = "This is a Fluentd plugin to parse uri and query string in log messages."
|
13
|
+
spec.description = "This is a Fluentd plugin to parse uri and query string in log messages."
|
14
|
+
spec.homepage = "https://gitlab.com/pbenaoun/fluent-plugin-uri-cma-parser"
|
15
|
+
|
16
|
+
##spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
18
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
19
|
+
end
|
20
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
|
+
spec.require_paths = ["lib"]
|
22
|
+
|
23
|
+
spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
|
24
|
+
spec.add_runtime_dependency "addressable"
|
25
|
+
|
26
|
+
spec.add_development_dependency "test-unit"
|
27
|
+
spec.add_development_dependency "bundler", "~> 2.0"
|
28
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
29
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
module Fluent
|
2
|
+
module Plugin
|
3
|
+
class QueryStringParserFilter < Filter
|
4
|
+
Fluent::Plugin.register_filter("query_string_parser", self)
|
5
|
+
|
6
|
+
config_param :out_key_path, :string, default: 'path'
|
7
|
+
config_param :out_key_query, :string, default: 'params'
|
8
|
+
config_param :out_key_fragment, :string, default: 'fragment'
|
9
|
+
config_param :key_name, :string
|
10
|
+
config_param :hash_value_field, :string, default: 'indexing'
|
11
|
+
config_param :inject_key_prefix, :string, default: nil
|
12
|
+
config_param :suppress_parse_error_log, :bool, default: false
|
13
|
+
config_param :ignore_key_not_exist, :bool, default: false
|
14
|
+
config_param :emit_invalid_record_to_error, :bool, default: true
|
15
|
+
config_param :multi_value_params, :bool, default: false
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
super
|
19
|
+
require "addressable/uri"
|
20
|
+
end
|
21
|
+
|
22
|
+
def cleaner_prefix(line)
|
23
|
+
line.gsub(Regexp.new('^.*(\?)'), '').strip
|
24
|
+
end
|
25
|
+
|
26
|
+
def valid_url(line)
|
27
|
+
if line.match(Regexp.new('(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})'))
|
28
|
+
true
|
29
|
+
else
|
30
|
+
false
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def filter(tag, time, record)
|
35
|
+
raw_value = record[@key_name]
|
36
|
+
|
37
|
+
if raw_value.nil?
|
38
|
+
if @emit_invalid_record_to_error
|
39
|
+
router.emit_error_event(tag, time, record, ArgumentError.new("#{@key_name} does not exist"))
|
40
|
+
end
|
41
|
+
return @ignore_key_not_exist ? nil : record
|
42
|
+
end
|
43
|
+
|
44
|
+
begin
|
45
|
+
if valid_url(raw_value)
|
46
|
+
uri = Addressable::URI.parse(raw_value)
|
47
|
+
values = {}
|
48
|
+
values[@out_key_path] = uri.path if @out_key_path
|
49
|
+
values[@out_key_query] = uri.query if @out_key_query
|
50
|
+
values[@out_key_fragment] = uri.fragment if @out_key_fragment
|
51
|
+
values.reject! {|_, v| v.nil? } if @ignore_nil
|
52
|
+
unless values[@out_key_query].nil?
|
53
|
+
params = Addressable::URI.form_unencode(cleaner_prefix(values[@out_key_query]))
|
54
|
+
end
|
55
|
+
|
56
|
+
unless params.empty?
|
57
|
+
if @multi_value_params
|
58
|
+
values = Hash.new {|h,k| h[k] = [] }
|
59
|
+
params.each{|pair| values[pair[0]].push(pair[1])}
|
60
|
+
else
|
61
|
+
values = Hash[params]
|
62
|
+
end
|
63
|
+
if @inject_key_prefix
|
64
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
65
|
+
end
|
66
|
+
r = @hash_value_field ? { @hash_value_field => values } : values
|
67
|
+
record = record.merge(r)
|
68
|
+
end
|
69
|
+
else
|
70
|
+
log.warn "url not valid: #{raw_value}"
|
71
|
+
end
|
72
|
+
rescue => e
|
73
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
74
|
+
end
|
75
|
+
record
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
module Fluent
|
2
|
+
module Plugin
|
3
|
+
class URIParserFilter < Filter
|
4
|
+
Fluent::Plugin.register_filter("uri_parser", self)
|
5
|
+
|
6
|
+
config_param :key_name, :string
|
7
|
+
config_param :hash_value_field, :string, default: nil
|
8
|
+
config_param :inject_key_prefix, :string, default: nil
|
9
|
+
config_param :suppress_parse_error_log, :bool, default: false
|
10
|
+
config_param :ignore_key_not_exist, :bool, default: false
|
11
|
+
config_param :ignore_nil, :bool, default: false
|
12
|
+
config_param :emit_invalid_record_to_error, :bool, default: true
|
13
|
+
|
14
|
+
config_param :out_key_scheme, :string, default: nil
|
15
|
+
config_param :out_key_host, :string, default: nil
|
16
|
+
config_param :out_key_port, :string, default: nil
|
17
|
+
config_param :out_key_path, :string, default: nil
|
18
|
+
config_param :out_key_query, :string, default: nil
|
19
|
+
config_param :out_key_fragment, :string, default: nil
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
super
|
23
|
+
require "addressable/uri"
|
24
|
+
end
|
25
|
+
|
26
|
+
def filter(tag, time, record)
|
27
|
+
raw_value = record[@key_name]
|
28
|
+
|
29
|
+
if raw_value.nil?
|
30
|
+
if @emit_invalid_record_to_error
|
31
|
+
router.emit_error_event(tag, time, record, ArgumentError.new("#{@key_name} does not exist"))
|
32
|
+
end
|
33
|
+
return @ignore_key_not_exist ? nil : record
|
34
|
+
end
|
35
|
+
|
36
|
+
begin
|
37
|
+
uri = Addressable::URI.parse(raw_value)
|
38
|
+
|
39
|
+
values = {}
|
40
|
+
values[@out_key_scheme] = uri.scheme if @out_key_scheme
|
41
|
+
values[@out_key_host] = uri.host if @out_key_host
|
42
|
+
values[@out_key_port] = uri.inferred_port if @out_key_port
|
43
|
+
values[@out_key_path] = uri.path if @out_key_path
|
44
|
+
values[@out_key_query] = uri.query if @out_key_query
|
45
|
+
values[@out_key_fragment] = uri.fragment if @out_key_fragment
|
46
|
+
values.reject! {|_, v| v.nil? } if @ignore_nil
|
47
|
+
|
48
|
+
unless values.empty?
|
49
|
+
if @inject_key_prefix
|
50
|
+
values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
|
51
|
+
end
|
52
|
+
r = @hash_value_field ? { @hash_value_field => values } : values
|
53
|
+
record = record.merge(r)
|
54
|
+
end
|
55
|
+
rescue => e
|
56
|
+
log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
|
57
|
+
end
|
58
|
+
|
59
|
+
return record
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/wercker.yml
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
box: ruby:2.3.0
|
2
|
+
# Build definition
|
3
|
+
build:
|
4
|
+
steps:
|
5
|
+
- script:
|
6
|
+
name: update bundler
|
7
|
+
code: gem update bundler
|
8
|
+
|
9
|
+
# A step that executes `bundle install` command
|
10
|
+
- bundle-install:
|
11
|
+
jobs: 4
|
12
|
+
|
13
|
+
# A custom script step, name value is used in the UI
|
14
|
+
# and the code value contains the command that get executed
|
15
|
+
- script:
|
16
|
+
name: echo ruby information
|
17
|
+
code: |
|
18
|
+
echo "ruby version $(ruby --version) running"
|
19
|
+
echo "from location $(which ruby)"
|
20
|
+
echo -p "gem list: $(gem list)"
|
21
|
+
|
22
|
+
# Add more steps here:
|
23
|
+
- script:
|
24
|
+
name: test
|
25
|
+
code: bundle exec rake test
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fluent-plugin-uri-cma-parser
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Philippe BENAOUN
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-03-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: fluentd
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 0.14.0
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '2'
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 0.14.0
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '2'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: addressable
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
type: :runtime
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '0'
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: test-unit
|
49
|
+
requirement: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
requirements:
|
58
|
+
- - ">="
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: '0'
|
61
|
+
- !ruby/object:Gem::Dependency
|
62
|
+
name: bundler
|
63
|
+
requirement: !ruby/object:Gem::Requirement
|
64
|
+
requirements:
|
65
|
+
- - "~>"
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '2.0'
|
68
|
+
type: :development
|
69
|
+
prerelease: false
|
70
|
+
version_requirements: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.0'
|
75
|
+
- !ruby/object:Gem::Dependency
|
76
|
+
name: rake
|
77
|
+
requirement: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '10.0'
|
82
|
+
type: :development
|
83
|
+
prerelease: false
|
84
|
+
version_requirements: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '10.0'
|
89
|
+
description: This is a Fluentd plugin to parse uri and query string in log messages.
|
90
|
+
email:
|
91
|
+
- philippe.benaoun@gmail.com
|
92
|
+
executables: []
|
93
|
+
extensions: []
|
94
|
+
extra_rdoc_files: []
|
95
|
+
files:
|
96
|
+
- ".gitignore"
|
97
|
+
- Gemfile
|
98
|
+
- README.md
|
99
|
+
- Rakefile
|
100
|
+
- fluent-plugin-uri-parser.gemspec
|
101
|
+
- lib/fluent/plugin/filter_query_string_parser.rb
|
102
|
+
- lib/fluent/plugin/filter_uri_parser.rb
|
103
|
+
- wercker.yml
|
104
|
+
homepage: https://gitlab.com/pbenaoun/fluent-plugin-uri-cma-parser
|
105
|
+
licenses:
|
106
|
+
- Apache-2.0
|
107
|
+
metadata: {}
|
108
|
+
post_install_message:
|
109
|
+
rdoc_options: []
|
110
|
+
require_paths:
|
111
|
+
- lib
|
112
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - ">="
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '0'
|
117
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: '0'
|
122
|
+
requirements: []
|
123
|
+
rubygems_version: 3.1.6
|
124
|
+
signing_key:
|
125
|
+
specification_version: 4
|
126
|
+
summary: This is a Fluentd plugin to parse uri and query string in log messages.
|
127
|
+
test_files: []
|