fluent-plugin-uri-cma-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8c10992d5f384ddb2778a65b1a0da29102ad683e735ea02f3f124068f7db9a09
4
+ data.tar.gz: 2526dc16b541e89ef61d961446e41d5c4d3066fa00b82dfc2c629d8c5ec6da4b
5
+ SHA512:
6
+ metadata.gz: ebaad4f371e53c04c8b1162b96651fbb2151a27df3d36aef31af86e4190b2b4a781beef1d3924006c6725c680460e5c27371aeaa3aa80c812cafd1a067b63b18
7
+ data.tar.gz: 537f66fc203fcd709e305864052ce042682c00bd66ef76c2b632ce016d5de5e2db83138543d4857ff67eb9f72837662a0d25488f6685ced478898f89345bd0bf
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ /vendor/
11
+ .idea
12
+ *.iml
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-url-parser.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,104 @@
1
+ # fluent-plugin-uri-cma-parser
2
+
3
+ [![Gem Version](https://badge.fury.io/rb/fluent-plugin-uri-parser.svg)](https://badge.fury.io/rb/fluent-plugin-uri-parser) [![wercker status](https://app.wercker.com/status/a735d29143f3a1a727fc65653bc81e2a/s "wercker status")](https://app.wercker.com/project/bykey/a735d29143f3a1a727fc65653bc81e2a)
4
+
5
+ This is a Fluentd plugin to parse uri and query string in log messages.
6
+
7
+ ## Requirements
8
+
9
+ | fluent-plugin-uri-cma-parser | fluentd | ruby |
10
+ |---------------------------|------------|--------|
11
+ | >= 0.3.0 | >= v0.14.0 | >= 2.1 |
12
+ | < 0.2.0 | >= v0.12.0 | >= 1.9 |
13
+
14
+ ## Installation
15
+
16
+ ``` shell
17
+ $ gem install fluent-plugin-uri-cma-parser -v "~> 0.2" # for fluentd v0.12 or later
18
+ $ gem install fluent-plugin-uri-cma-parser # for fluentd v0.14 or later
19
+ ```
20
+
21
+ ## Component
22
+
23
+ ### URIParserFilter
24
+
25
+ This is a Fluentd plugin to parse and filtering uri in log messages and re-emit them.
26
+
27
+ ### QueryStringParserFilter
28
+
29
+ This is a Fluentd plugin to parse and filtering query string in log messages and re-emit them.
30
+
31
+ ## Configuration
32
+
33
+ ```
34
+ <filter>
35
+ @type uri_parser
36
+ key_name uri
37
+ inject_key_prefix parsed
38
+ # hash_value_field parsed
39
+ # suppress_parse_error_log false
40
+ # ignore_key_not_exist false
41
+ # ignore_nil false
42
+
43
+ out_key_scheme scheme
44
+ out_key_host host
45
+ out_key_port port
46
+ out_key_path path
47
+ out_key_query query
48
+ out_key_fragment fragment
49
+ </filter>
50
+ # input string of data: {"uri": "http://example.com/path?foo=bar#t=1"}
51
+ # output data: {"parsed.scheme":"http","parsed.host":"example.com","parsed.port":80,"parsed.path":"/path","parsed.query":"foo=bar","parsed.ragment":"t=1"}
52
+
53
+ <filter>
54
+ @type query_string_parser
55
+ key_name parsed.query
56
+ hash_value_field query
57
+ # inject_key_prefix query
58
+ # suppress_parse_error_log false
59
+ # ignore_key_not_exist false
60
+ </filter>
61
+ # input string of data: {"parsed.query": "foo=bar"}
62
+ # output data: {"query":{"foo":"bar"}}
63
+
64
+ ```
65
+
66
+ **key_name (Required)**
67
+
68
+ Key of the value to be parsed in the record.
69
+
70
+ **hash_value_field (Default: '')**
71
+
72
+ If a value is set, the value after parsing is stored in hash with key specified value.
73
+
74
+ **inject_key_prefix (Default: '')**
75
+
76
+ If you set a value, set the value specified for the key after parsing as prefix.
77
+
78
+ **suppress_parse_error_log (Default: false)**
79
+
80
+ If set to `true`, no error log is output even if parsing fails.
81
+
82
+ **ignore_key_not_exist (Default: false)**
83
+
84
+ If set to `true`, if the field specified by `key_name` does not exist, the record will not be emit to the next stream. That means that the data will be lost there.
85
+
86
+ **ignore_nil (Default: false)**
87
+
88
+ If set to `true`, exclude key if the value after parse is nil.
89
+
90
+ **multi_value_params (Default: false)**
91
+
92
+ If set to `true`, then resulting values would be arrays containing
93
+ potentially multiple values of a given parameter.
94
+
95
+ ## Development
96
+
97
+ After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
98
+
99
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
100
+
101
+ ## Contributing
102
+
103
+ Bug reports and pull requests are welcome on GitHub at https://github.com/daichirata/fluent-plugin-uri-cma-parser.
104
+
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require 'rake/testtask'
4
+ Rake::TestTask.new(:test) do |test|
5
+ test.libs << 'lib' << 'test'
6
+ test.pattern = 'test/**/test_*.rb'
7
+ test.verbose = true
8
+ end
9
+
10
+ task :default => :test
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "fluent-plugin-uri-cma-parser"
7
+ spec.version = "0.1.0"
8
+ spec.authors = ["Philippe BENAOUN"]
9
+ spec.email = ["philippe.benaoun@gmail.com"]
10
+ spec.license = "Apache-2.0"
11
+
12
+ spec.summary = "This is a Fluentd plugin to parse uri and query string in log messages."
13
+ spec.description = "This is a Fluentd plugin to parse uri and query string in log messages."
14
+ spec.homepage = "https://gitlab.com/pbenaoun/fluent-plugin-uri-cma-parser"
15
+
16
+ ##spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
18
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
19
+ end
20
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
+ spec.require_paths = ["lib"]
22
+
23
+ spec.add_runtime_dependency "fluentd", [">= 0.14.0", "< 2"]
24
+ spec.add_runtime_dependency "addressable"
25
+
26
+ spec.add_development_dependency "test-unit"
27
+ spec.add_development_dependency "bundler", "~> 2.0"
28
+ spec.add_development_dependency "rake", "~> 10.0"
29
+ end
@@ -0,0 +1,79 @@
1
+ module Fluent
2
+ module Plugin
3
+ class QueryStringParserFilter < Filter
4
+ Fluent::Plugin.register_filter("query_string_parser", self)
5
+
6
+ config_param :out_key_path, :string, default: 'path'
7
+ config_param :out_key_query, :string, default: 'params'
8
+ config_param :out_key_fragment, :string, default: 'fragment'
9
+ config_param :key_name, :string
10
+ config_param :hash_value_field, :string, default: 'indexing'
11
+ config_param :inject_key_prefix, :string, default: nil
12
+ config_param :suppress_parse_error_log, :bool, default: false
13
+ config_param :ignore_key_not_exist, :bool, default: false
14
+ config_param :emit_invalid_record_to_error, :bool, default: true
15
+ config_param :multi_value_params, :bool, default: false
16
+
17
+ def initialize
18
+ super
19
+ require "addressable/uri"
20
+ end
21
+
22
+ def cleaner_prefix(line)
23
+ line.gsub(Regexp.new('^.*(\?)'), '').strip
24
+ end
25
+
26
+ def valid_url(line)
27
+ if line.match(Regexp.new('(https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|www\.[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s]{2,}|https?:\/\/(?:www\.|(?!www))[a-zA-Z0-9]+\.[^\s]{2,}|www\.[a-zA-Z0-9]+\.[^\s]{2,})'))
28
+ true
29
+ else
30
+ false
31
+ end
32
+ end
33
+
34
+ def filter(tag, time, record)
35
+ raw_value = record[@key_name]
36
+
37
+ if raw_value.nil?
38
+ if @emit_invalid_record_to_error
39
+ router.emit_error_event(tag, time, record, ArgumentError.new("#{@key_name} does not exist"))
40
+ end
41
+ return @ignore_key_not_exist ? nil : record
42
+ end
43
+
44
+ begin
45
+ if valid_url(raw_value)
46
+ uri = Addressable::URI.parse(raw_value)
47
+ values = {}
48
+ values[@out_key_path] = uri.path if @out_key_path
49
+ values[@out_key_query] = uri.query if @out_key_query
50
+ values[@out_key_fragment] = uri.fragment if @out_key_fragment
51
+ values.reject! {|_, v| v.nil? } if @ignore_nil
52
+ unless values[@out_key_query].nil?
53
+ params = Addressable::URI.form_unencode(cleaner_prefix(values[@out_key_query]))
54
+ end
55
+
56
+ unless params.empty?
57
+ if @multi_value_params
58
+ values = Hash.new {|h,k| h[k] = [] }
59
+ params.each{|pair| values[pair[0]].push(pair[1])}
60
+ else
61
+ values = Hash[params]
62
+ end
63
+ if @inject_key_prefix
64
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
65
+ end
66
+ r = @hash_value_field ? { @hash_value_field => values } : values
67
+ record = record.merge(r)
68
+ end
69
+ else
70
+ log.warn "url not valid: #{raw_value}"
71
+ end
72
+ rescue => e
73
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
74
+ end
75
+ record
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,63 @@
1
+ module Fluent
2
+ module Plugin
3
+ class URIParserFilter < Filter
4
+ Fluent::Plugin.register_filter("uri_parser", self)
5
+
6
+ config_param :key_name, :string
7
+ config_param :hash_value_field, :string, default: nil
8
+ config_param :inject_key_prefix, :string, default: nil
9
+ config_param :suppress_parse_error_log, :bool, default: false
10
+ config_param :ignore_key_not_exist, :bool, default: false
11
+ config_param :ignore_nil, :bool, default: false
12
+ config_param :emit_invalid_record_to_error, :bool, default: true
13
+
14
+ config_param :out_key_scheme, :string, default: nil
15
+ config_param :out_key_host, :string, default: nil
16
+ config_param :out_key_port, :string, default: nil
17
+ config_param :out_key_path, :string, default: nil
18
+ config_param :out_key_query, :string, default: nil
19
+ config_param :out_key_fragment, :string, default: nil
20
+
21
+ def initialize
22
+ super
23
+ require "addressable/uri"
24
+ end
25
+
26
+ def filter(tag, time, record)
27
+ raw_value = record[@key_name]
28
+
29
+ if raw_value.nil?
30
+ if @emit_invalid_record_to_error
31
+ router.emit_error_event(tag, time, record, ArgumentError.new("#{@key_name} does not exist"))
32
+ end
33
+ return @ignore_key_not_exist ? nil : record
34
+ end
35
+
36
+ begin
37
+ uri = Addressable::URI.parse(raw_value)
38
+
39
+ values = {}
40
+ values[@out_key_scheme] = uri.scheme if @out_key_scheme
41
+ values[@out_key_host] = uri.host if @out_key_host
42
+ values[@out_key_port] = uri.inferred_port if @out_key_port
43
+ values[@out_key_path] = uri.path if @out_key_path
44
+ values[@out_key_query] = uri.query if @out_key_query
45
+ values[@out_key_fragment] = uri.fragment if @out_key_fragment
46
+ values.reject! {|_, v| v.nil? } if @ignore_nil
47
+
48
+ unless values.empty?
49
+ if @inject_key_prefix
50
+ values = Hash[values.map{|k,v| [ @inject_key_prefix + k, v ]}]
51
+ end
52
+ r = @hash_value_field ? { @hash_value_field => values } : values
53
+ record = record.merge(r)
54
+ end
55
+ rescue => e
56
+ log.warn "parse failed #{e.message}" unless @suppress_parse_error_log
57
+ end
58
+
59
+ return record
60
+ end
61
+ end
62
+ end
63
+ end
data/wercker.yml ADDED
@@ -0,0 +1,25 @@
1
+ box: ruby:2.3.0
2
+ # Build definition
3
+ build:
4
+ steps:
5
+ - script:
6
+ name: update bundler
7
+ code: gem update bundler
8
+
9
+ # A step that executes `bundle install` command
10
+ - bundle-install:
11
+ jobs: 4
12
+
13
+ # A custom script step, name value is used in the UI
14
+ # and the code value contains the command that get executed
15
+ - script:
16
+ name: echo ruby information
17
+ code: |
18
+ echo "ruby version $(ruby --version) running"
19
+ echo "from location $(which ruby)"
20
+ echo -p "gem list: $(gem list)"
21
+
22
+ # Add more steps here:
23
+ - script:
24
+ name: test
25
+ code: bundle exec rake test
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-uri-cma-parser
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Philippe BENAOUN
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-03-15 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: fluentd
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 0.14.0
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '2'
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: 0.14.0
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '2'
33
+ - !ruby/object:Gem::Dependency
34
+ name: addressable
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ - !ruby/object:Gem::Dependency
48
+ name: test-unit
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ - !ruby/object:Gem::Dependency
62
+ name: bundler
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '2.0'
68
+ type: :development
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.0'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rake
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '10.0'
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '10.0'
89
+ description: This is a Fluentd plugin to parse uri and query string in log messages.
90
+ email:
91
+ - philippe.benaoun@gmail.com
92
+ executables: []
93
+ extensions: []
94
+ extra_rdoc_files: []
95
+ files:
96
+ - ".gitignore"
97
+ - Gemfile
98
+ - README.md
99
+ - Rakefile
100
+ - fluent-plugin-uri-parser.gemspec
101
+ - lib/fluent/plugin/filter_query_string_parser.rb
102
+ - lib/fluent/plugin/filter_uri_parser.rb
103
+ - wercker.yml
104
+ homepage: https://gitlab.com/pbenaoun/fluent-plugin-uri-cma-parser
105
+ licenses:
106
+ - Apache-2.0
107
+ metadata: {}
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubygems_version: 3.1.6
124
+ signing_key:
125
+ specification_version: 4
126
+ summary: This is a Fluentd plugin to parse uri and query string in log messages.
127
+ test_files: []