fluent-plugin-watch-process 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ vendor/*
data/.travis.yml ADDED
@@ -0,0 +1,6 @@
1
+ language: ruby
2
+
3
+ rvm:
4
+ - 2.1.0
5
+ - 2.0.0
6
+ - 1.9.3
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-watch-process.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,14 @@
1
+ Copyright (c) 2012- Kentaro Yoshida
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
14
+
data/README.md ADDED
@@ -0,0 +1,104 @@
1
+ fluent-plugin-watch-process [![Build Status](https://travis-ci.org/y-ken/fluent-plugin-watch-process.png?branch=master)](https://travis-ci.org/y-ken/fluent-plugin-watch-process)
2
+ =====================
3
+
4
+ ## Overview
5
+
6
+ Fluentd Input plugin to collect process information via ps command.
7
+
8
+ ## Use Cases
9
+
10
+ * collect cron/batch process for analysis.
11
+ * high cpu load time
12
+ * high usage of memory
13
+ * determine too long running task
14
+
15
+ * output destination example
16
+ * Elasticsearch + Kibana to visualize statistics. Example: [example1.conf](https://github.com/y-ken/fluent-plugin-watch-process/blob/master/example1.conf)
17
+ * save process information as audit log into AWS S3 which filename isolated by hostname. Example: [example2.conf](https://github.com/y-ken/fluent-plugin-watch-process/blob/master/example2.conf)
18
+
19
+ ## Installation
20
+
21
+ install with gem or fluent-gem command as:
22
+
23
+ ```
24
+ # for fluentd
25
+ $ gem install fluent-plugin-watch-process
26
+
27
+ # for td-agent
28
+ $ sudo /usr/lib64/fluent/ruby/bin/fluent-gem install fluent-plugin-watch-process
29
+ ```
30
+
31
+ ## Configuration
32
+
33
+ ### Sample
34
+
35
+ It is a quick sample to output log to `/var/log/td-agent/td-agent.log` with td-agent.
36
+
37
+ `````
38
+ <source>
39
+ type watch_process
40
+ tag debug.batch.${hostname} # Required
41
+ lookup_user batchuser # Optional
42
+ interval 10s # Optional (default: 5s)
43
+ </source>
44
+
45
+ <match debug.**>
46
+ type stdout
47
+ </match>
48
+ `````
49
+
50
+ After restarting td-agent, it will output process information to the td-agent.log like below.
51
+
52
+ `````
53
+ $ tail -f /var/log/td-agent/td-agent.log
54
+ 2014-01-16 14:21:34 +0900 debug.batch.localhost: {"start_time":"2014-01-16 14:21:13 +0900","user":"td-agent","pid":17486,"parent_pid":17483,"cpu_time":"00:00:00","cpu_percent":1.5,"memory_percent":3.5,"mem_rss":36068,"mem_size":60708,"state":"S","proc_name":"ruby","command":"/usr/lib64/fluent/ruby/bin/ruby /usr/sbin/td-agent --group td-agent --log /var/log/td-agent/td-agent.log --daemon /var/run/td-agent/td-agent.pid","elapsed_time":21}
55
+ `````
56
+
57
+ ### Syntax
58
+
59
+ * tag # Required
60
+ * record output destination
61
+ * supported tag placeholders are `${hostname}` and `__HOSTNAME__`.
62
+
63
+ * command # Optional
64
+ * execute ps command with some options
65
+ * [default] Linux: `LANG=en_US.UTF-8 && ps -ewwo lstart,user:20,pid,ppid,time,%cpu,%mem,rss,sz,s,comm,cmd`
66
+ * [default] MacOSX: `LANG=en_US.UTF-8 && ps -ewwo lstart,user,pid,ppid,time,%cpu,%mem,rss,vsz,state,comm,command`
67
+
68
+ * keys # Optional
69
+ * output record keys of the ps command results
70
+ * [default] start_time user pid parent_pid cpu_time cpu_percent memory_percent mem_rss mem_size state proc_name command
71
+
72
+ * types # Optional
73
+ * settings of converting types from string to integer/float.
74
+ * [default] pid:integer parent_pid:integer cpu_percent:float memory_percent:float mem_rss:integer mem_size:integer
75
+
76
+ * interval # Optional
77
+ * execute interval time
78
+ * [default] 5s
79
+
80
+ * lookup_user # Optional
81
+ * filter process owner username with comma delimited
82
+ * [default] N/A
83
+
84
+ * hostname_command # Optional
85
+ * settings for tag placeholder, `${hostname}` and `__HOSTNAME__`. By default, it using long hostname.
86
+ * to use short hostname, set `hostname -s` for this option on linux/mac.
87
+ * [default] `hostname`
88
+
89
+ ## FAQ
90
+
91
+ * I need hostname key in the record.
92
+ To add the hostname key in the record, use fluent-plugin-record-reformer together.
93
+
94
+ ## TODO
95
+
96
+ patches welcome!
97
+
98
+ ## Copyright
99
+
100
+ Copyright © 2013- Kentaro Yoshida (@yoshi_ken)
101
+
102
+ ## License
103
+
104
+ Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,10 @@
1
+ require "bundler/gem_tasks"
2
+ require "rake/testtask"
3
+ Rake::TestTask.new(:test) do |test|
4
+ test.libs << 'lib' << 'test'
5
+ test.pattern = 'test/**/test_*.rb'
6
+ test.verbose = true
7
+ end
8
+
9
+ task :default => :test
10
+
data/example1.conf ADDED
@@ -0,0 +1,41 @@
1
+ # it is a sample to store batch process statistics into elasticsearch to visualize with kibana.
2
+
3
+ # this guide using following plugins.
4
+ # * fluent-plugin-watch-process
5
+ # * fluent-plugin-record-reformer
6
+ # * fluent-plugin-elasticsearch
7
+
8
+ <source>
9
+ type watch_process
10
+
11
+ # specify output tag
12
+ tag batch_process
13
+
14
+ # filter specific user owned process. if no need to filter, delete this line.
15
+ lookup_user batchuser
16
+
17
+ # ps command execute interval time
18
+ interval 10s
19
+ </source>
20
+
21
+ <match batch_process>
22
+ type record_reformer
23
+ output_tag reformed.${tag}
24
+ enable_ruby false
25
+ <record>
26
+ # add hostname key into record
27
+ hostname ${hostname}
28
+ </record>
29
+ </match>
30
+
31
+ <match reformed.*>
32
+ type elasticsearch
33
+ host localhost
34
+ port 9200
35
+ logstash_format true
36
+ logstash_prefix logstash
37
+ type_name batch
38
+
39
+ # write record interval
40
+ flush_interval 10sec
41
+ </match>
data/example2.conf ADDED
@@ -0,0 +1,34 @@
1
+ # it is a sample to save process information as audit log into AWS S3 which filename isolated by hostname.
2
+
3
+ # this guide using following plugins.
4
+ # * fluent-plugin-watch-process
5
+ # * fluent-plugin-s3
6
+
7
+ <source>
8
+ type watch_process
9
+
10
+ # use ${hostname} placeholder to use filename
11
+ tag batch_process.${hostname}
12
+
13
+ # filter specific user owned process. if no need to filter, delete this line.
14
+ lookup_user batchuser
15
+
16
+ # ps command execute interval time
17
+ interval 1s
18
+ </source>
19
+
20
+ <match batch_process.*>
21
+ type s3
22
+
23
+ aws_key_id YOUR_AWS_KEY_ID
24
+ aws_sec_key YOUR_AWS_SECRET/KEY
25
+ s3_bucket YOUR_S3_BUCKET_NAME
26
+ s3_endpoint s3-ap-northeast-1.amazonaws.com
27
+ s3_object_key_format %{path}%{time_slice}_%{index}.%{file_extension}
28
+ path barch_process_logs/
29
+ buffer_path /var/log/fluent/s3_batch_process
30
+
31
+ time_slice_format %Y%m%d-%H
32
+ time_slice_wait 10m
33
+ utc
34
+ </match>
@@ -0,0 +1,20 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = "fluent-plugin-watch-process"
6
+ s.version = "0.0.1"
7
+ s.authors = ["Kentaro Yoshida"]
8
+ s.email = ["y.ken.studio@gmail.com"]
9
+ s.homepage = "https://github.com/y-ken/fluent-plugin-watch-process"
10
+ s.summary = %q{Fluentd Input plugin to collect process information via ps command.}
11
+
12
+ s.files = `git ls-files`.split("\n")
13
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
14
+ s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
15
+ s.require_paths = ["lib"]
16
+
17
+ # specify any dependencies:
18
+ s.add_development_dependency "rake"
19
+ s.add_runtime_dependency "fluentd"
20
+ end
@@ -0,0 +1,116 @@
1
+ module Fluent
2
+ class WatchProcessInput < Fluent::Input
3
+ Plugin.register_input('watch_process', self)
4
+
5
+ config_param :tag, :string
6
+ config_param :command, :string, :default => nil
7
+ config_param :keys, :string, :default => nil
8
+ config_param :types, :string, :default => nil
9
+ config_param :interval, :string, :default => '5s'
10
+ config_param :lookup_user, :string, :default => nil
11
+ config_param :hostname_command, :string, :default => 'hostname'
12
+
13
+ Converters = {
14
+ 'string' => lambda { |v| v.to_s },
15
+ 'integer' => lambda { |v| v.to_i },
16
+ 'float' => lambda { |v| v.to_f },
17
+ 'bool' => lambda { |v|
18
+ case v.downcase
19
+ when 'true', 'yes', '1'
20
+ true
21
+ else
22
+ false
23
+ end
24
+ },
25
+ 'time' => lambda { |v, time_parser|
26
+ time_parser.parse(v)
27
+ },
28
+ 'array' => lambda { |v, delimiter|
29
+ v.to_s.split(delimiter)
30
+ }
31
+ }
32
+
33
+ def initialize
34
+ super
35
+ require 'time'
36
+ end
37
+
38
+ def configure(conf)
39
+ super
40
+
41
+ @command = @command || get_ps_command
42
+ @keys = @keys || %w(start_time user pid parent_pid cpu_time cpu_percent memory_percent mem_rss mem_size state proc_name command)
43
+ types = @types || %w(pid:integer parent_pid:integer cpu_percent:float memory_percent:float mem_rss:integer mem_size:integer)
44
+ @types_map = Hash[types.map{|v| v.split(':')}]
45
+ @lookup_user = @lookup_user.gsub(' ', '').split(',') unless @lookup_user.nil?
46
+ @interval = Config.time_value(@interval)
47
+ @hostname = `#{@hostname_command}`.chomp
48
+ $log.info "watch_process: polling start. :tag=>#{@tag} :lookup_user=>#{@lookup_user} :interval=>#{@interval} :command=>#{@command}"
49
+ end
50
+
51
+ def start
52
+ @thread = Thread.new(&method(:run))
53
+ end
54
+
55
+ def shutdown
56
+ Thread.kill(@thread)
57
+ end
58
+
59
+ def run
60
+ loop do
61
+ io = IO.popen(@command, 'r')
62
+ io.gets
63
+ while result = io.gets
64
+ values = result.chomp.strip.split(/\s+/, @keys.size + 4)
65
+ time = Time.parse(values[0...5].join(' '))
66
+ data = Hash[
67
+ @keys.zip([time.to_s, values.values_at(5..15)].flatten).map do |k,v|
68
+ v = Converters[@types_map[k]].call(v) if @types_map.include?(k)
69
+ [k,v]
70
+ end
71
+ ]
72
+ data['elapsed_time'] = (Time.now - Time.parse(data['start_time'])).to_i
73
+ next unless @lookup_user.nil? || @lookup_user.include?(data['user'])
74
+ tag = @tag.gsub(/(\${[a-z]+}|__[A-Z]+__)/, get_placeholder)
75
+ Engine.emit(tag, Engine.now, data)
76
+ end
77
+ io.close
78
+ sleep @interval
79
+ end
80
+ end
81
+
82
+ def get_ps_command
83
+ if OS.linux?
84
+ "LANG=en_US.UTF-8 && ps -ewwo lstart,user:20,pid,ppid,time,%cpu,%mem,rss,sz,s,comm,cmd"
85
+ elsif OS.mac?
86
+ "LANG=en_US.UTF-8 && ps -ewwo lstart,user,pid,ppid,time,%cpu,%mem,rss,vsz,state,comm,command"
87
+ end
88
+ end
89
+
90
+ module OS
91
+ # ref. http://stackoverflow.com/questions/170956/how-can-i-find-which-operating-system-my-ruby-program-is-running-on
92
+ def OS.windows?
93
+ (/cygwin|mswin|mingw|bccwin|wince|emx/ =~ RUBY_PLATFORM) != nil
94
+ end
95
+
96
+ def OS.mac?
97
+ (/darwin/ =~ RUBY_PLATFORM) != nil
98
+ end
99
+
100
+ def OS.unix?
101
+ !OS.windows?
102
+ end
103
+
104
+ def OS.linux?
105
+ OS.unix? and not OS.mac?
106
+ end
107
+ end
108
+
109
+ def get_placeholder
110
+ return {
111
+ '__HOSTNAME__' => @hostname,
112
+ '${hostname}' => @hostname,
113
+ }
114
+ end
115
+ end
116
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ }
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/in_watch_process'
26
+
27
+ class Test::Unit::TestCase
28
+ end
@@ -0,0 +1,30 @@
1
+ require 'helper'
2
+
3
+ class WatchProcessInputTest < Test::Unit::TestCase
4
+ def setup
5
+ Fluent::Test.setup
6
+ end
7
+
8
+ CONFIG = %[
9
+ tag input.watch_process
10
+ lookup_user apache, mycron
11
+ ]
12
+
13
+ def create_driver(conf=CONFIG,tag='test')
14
+ Fluent::Test::OutputTestDriver.new(Fluent::WatchProcessInput, tag).configure(conf)
15
+ end
16
+
17
+ def test_configure
18
+ assert_raise(Fluent::ConfigError) {
19
+ d = create_driver('')
20
+ }
21
+ d = create_driver %[
22
+ tag input.watch_process
23
+ lookup_user apache, mycron
24
+ ]
25
+ d.instance.inspect
26
+ assert_equal 'input.watch_process', d.instance.tag
27
+ assert_equal ['apache', 'mycron'], d.instance.lookup_user
28
+ end
29
+ end
30
+
metadata ADDED
@@ -0,0 +1,91 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-watch-process
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Kentaro Yoshida
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2014-01-16 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :development
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ - !ruby/object:Gem::Dependency
31
+ name: fluentd
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ! '>='
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ description:
47
+ email:
48
+ - y.ken.studio@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - .gitignore
54
+ - .travis.yml
55
+ - Gemfile
56
+ - LICENSE.txt
57
+ - README.md
58
+ - Rakefile
59
+ - example1.conf
60
+ - example2.conf
61
+ - fluent-plugin-watch-process.gemspec
62
+ - lib/fluent/plugin/in_watch_process.rb
63
+ - test/helper.rb
64
+ - test/plugin/test_in_watch_process.rb
65
+ homepage: https://github.com/y-ken/fluent-plugin-watch-process
66
+ licenses: []
67
+ post_install_message:
68
+ rdoc_options: []
69
+ require_paths:
70
+ - lib
71
+ required_ruby_version: !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ! '>='
75
+ - !ruby/object:Gem::Version
76
+ version: '0'
77
+ required_rubygems_version: !ruby/object:Gem::Requirement
78
+ none: false
79
+ requirements:
80
+ - - ! '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ requirements: []
84
+ rubyforge_project:
85
+ rubygems_version: 1.8.23
86
+ signing_key:
87
+ specification_version: 3
88
+ summary: Fluentd Input plugin to collect process information via ps command.
89
+ test_files:
90
+ - test/helper.rb
91
+ - test/plugin/test_in_watch_process.rb