fluent-plugin-google-cloud-storage 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: dc04df508c4d34342b1bb5508122f8990f1a3649
4
+ data.tar.gz: 9b2f23328532dbcdef8683d5982373d7b0330651
5
+ SHA512:
6
+ metadata.gz: cab1aad9019bac99499d8960fb81d840941085dd15f7aa9f50031e4d95e5f674a62e7a6add2490650f20bfe588379a1718ae3de5609656e8ac329bbccbec3f16
7
+ data.tar.gz: 3f75fa7be0a8f8206c2d5ff72657eba7c7f4692e92386f1b3a77ab6b1ab2e53d0d0dede026bb6b8a14e4d017623d5951964d4f975a39f588212c441656157ae9
data/.gitignore ADDED
@@ -0,0 +1,34 @@
1
+ *.gem
2
+ *.rbc
3
+ /.config
4
+ /coverage/
5
+ /InstalledFiles
6
+ /pkg/
7
+ /spec/reports/
8
+ /test/tmp/
9
+ /test/version_tmp/
10
+ /tmp/
11
+
12
+ ## Specific to RubyMotion:
13
+ .dat*
14
+ .repl_history
15
+ build/
16
+
17
+ ## Documentation cache and generated files:
18
+ /.yardoc/
19
+ /_yardoc/
20
+ /doc/
21
+ /rdoc/
22
+
23
+ ## Environment normalisation:
24
+ /.bundle/
25
+ /lib/bundler/man/
26
+
27
+ # for a library or gem, you might want to ignore these files since the code is
28
+ # intended to run in multiple environments; otherwise, check them in:
29
+ Gemfile.lock
30
+ .ruby-version
31
+ .ruby-gemset
32
+
33
+ # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
34
+ .rvmrc
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in fluent-plugin-webhdfs.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
1
+ Copyright (c) 2012- TAGOMORI Satoshi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,124 @@
1
+ # fluent-plugin-google-cloud-storage
2
+
3
+ [Fluentd](http://fluentd.org/) output plugin to write data into a [Google Cloud
4
+ Storage](https://cloud.google.com/storage/) bucket.
5
+
6
+ GoogleCloudStorageOutput slices data by time (specified unit), and store these
7
+ data as file of plain text. You can specify to:
8
+
9
+ * format whole data as serialized JSON, single attribute or separated multi attributes
10
+ * or LTSV, labeled-TSV (see http://ltsv.org/ )
11
+ * include time as line header, or not
12
+ * include tag as line header, or not
13
+ * change field separator (default: TAB)
14
+ * add new line as termination, or not
15
+
16
+ And you can specify output file path as 'path path/to/dir/access.%Y%m%d.log', then get 'path/to/dir/access.20120316.log' in your GCS bucket.
17
+
18
+ ## Configuration
19
+
20
+ ### GoogleCloudStorageOutput
21
+
22
+ To store data by time,tag,json (same with 'type file') over WebHDFS:
23
+
24
+ <match access.**>
25
+ type google_cloud_storage
26
+ service_email SERVICE_ACCOUNT_EMAIL
27
+ service_pkcs12_path /path/to/key.p12
28
+ project_id name-of-project
29
+ bucket_id name-of-bucket
30
+ path path/to/access.%Y%m%d_%H.log
31
+ </match>
32
+
33
+ To specify the pkcs12 file's password, use `service_pkcs12_password`:
34
+
35
+ <match access.**>
36
+ type google_cloud_storage
37
+ service_email SERVICE_ACCOUNT_EMAIL
38
+ service_pkcs12_path /path/to/key.p12
39
+ service_pkcs12_password SECRET_PASSWORD
40
+ project_id name-of-project
41
+ bucket_id name-of-bucket
42
+ path path/to/access.%Y%m%d_%H.log
43
+ </match>
44
+
45
+ If you want JSON object only (without time or tag or both on header of lines), specify it by `output_include_time` or `output_include_tag` (default true):
46
+
47
+ <match access.**>
48
+ type google_cloud_storage
49
+ service_email SERVICE_ACCOUNT_EMAIL
50
+ service_pkcs12_path /path/to/key.p12
51
+ project_id name-of-project
52
+ bucket_id name-of-bucket
53
+ path path/to/access.%Y%m%d_%H.log
54
+ output_include_time false
55
+ output_include_tag false
56
+ </match>
57
+
58
+ To store data as LTSV without time and tag over WebHDFS:
59
+
60
+ <match access.**>
61
+ type google_cloud_storage
62
+ # ...
63
+ output_data_type ltsv
64
+ </match>
65
+
66
+ Store data as TSV (TAB separated values) of specified keys, without time, with tag (removed prefix 'access'):
67
+
68
+ <match access.**>
69
+ type google_cloud_storage
70
+ # ...
71
+
72
+ field_separator TAB # or 'SPACE', 'COMMA' or 'SOH'(Start Of Heading: \001)
73
+ output_include_time false
74
+ output_include_tag true
75
+ remove_prefix access
76
+
77
+ output_data_type attr:path,status,referer,agent,bytes
78
+ </match>
79
+
80
+ If message doesn't have specified attribute, fluent-plugin-webhdfs outputs 'NULL' instead of values.
81
+
82
+ To store data compressed (gzip only now):
83
+
84
+ <match access.**>
85
+ type google_cloud_storage
86
+ # ...
87
+
88
+ compress gzip
89
+ </match>
90
+
91
+ ### Performance notifications
92
+
93
+ As GCS does not support appending to files, if you have multiple fluentd nodes,
94
+ you most likely want to log to multiple files. You can use '${hostname}' or
95
+ '${uuid:random}' placeholders in configuration for this purpose.
96
+
97
+ For hostname:
98
+
99
+ <match access.**>
100
+ type google_cloud_storage
101
+ # ...
102
+ path log/access/%Y%m%d/${hostname}.log
103
+ </match>
104
+
105
+ Or with random filename (to avoid duplicated file name only):
106
+
107
+ <match access.**>
108
+ type google_cloud_storage
109
+ # ...
110
+ path log/access/%Y%m%d/${uuid:random}.log
111
+ </match>
112
+
113
+ With configurations above, you can handle all of files of '/log/access/20120820/*' as specified timeslice access logs.
114
+
115
+ ## TODO
116
+
117
+ * docs?
118
+ * patches welcome!
119
+
120
+ ## Copyright
121
+
122
+ * Copyright (c) 2014- Hsiu-Fan Wang (hfwang@porkbuns.net)
123
+ * License
124
+ * Apache License, Version 2.0
data/Rakefile ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
3
+
4
+ require 'rake/testtask'
5
+ Rake::TestTask.new(:test) do |test|
6
+ test.libs << 'lib' << 'test'
7
+ test.pattern = 'test/**/test_*.rb'
8
+ test.verbose = true
9
+ end
10
+
11
+ task :default => :test
@@ -0,0 +1,23 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = "fluent-plugin-google-cloud-storage"
5
+ gem.version = "0.3.2"
6
+ gem.authors = ["Hsiu-Fan Wang"]
7
+ gem.email = ["hfwang@porkbuns.net"]
8
+ gem.summary = %q{Fluentd plugin to write data to Google Cloud Storage}
9
+ gem.description = %q{Google Cloud Storage fluentd output}
10
+ gem.homepage = "https://github.com/hfwang/fluent-plugin-google-cloud-storage"
11
+ gem.license = "APLv2"
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.require_paths = ["lib"]
17
+
18
+ gem.add_development_dependency "rake"
19
+ gem.add_runtime_dependency "fluentd", '>= 0.10.53'
20
+ gem.add_runtime_dependency "fluent-mixin-plaintextformatter", '>= 0.2.1'
21
+ gem.add_runtime_dependency "fluent-mixin-config-placeholders", ">= 0.3.0"
22
+ gem.add_runtime_dependency "google-api-client", '~> 0.7'
23
+ end
@@ -0,0 +1,154 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require 'fluent/mixin/config_placeholders'
4
+ require 'fluent/mixin/plaintextformatter'
5
+
6
+ class Fluent::GoogleCloudStorageOutput < Fluent::TimeSlicedOutput
7
+ Fluent::Plugin.register_output('google_cloud_storage', self)
8
+
9
+ config_set_default :buffer_type, 'file'
10
+ config_set_default :time_slice_format, '%Y%m%d'
11
+
12
+ config_param :ignore_start_check_error, :bool, :default => false
13
+
14
+ include Fluent::Mixin::ConfigPlaceholders
15
+
16
+ config_param :service_email, :string
17
+ config_param :service_pkcs12_path, :string
18
+ config_param :service_pkcs12_password, :string, :default => "notasecret"
19
+ config_param :project_id, :string
20
+ config_param :bucket_id, :string
21
+ config_param :path, :string
22
+
23
+ config_param :compress, :default => nil do |val|
24
+ unless ["gz", "gzip"].include?(val)
25
+ raise ConfigError, "Unsupported compression algorithm '#{val}'"
26
+ end
27
+ val
28
+ end
29
+
30
+ # how many times of write failure before switch to standby namenode
31
+ # by default it's 11 times that costs 1023 seconds inside fluentd,
32
+ # which is considered enough to exclude the scenes that caused by temporary network fail or single datanode fail
33
+ config_param :failures_before_use_standby, :integer, :default => 11
34
+
35
+ include Fluent::Mixin::PlainTextFormatter
36
+
37
+ config_param :default_tag, :string, :default => 'tag_missing'
38
+
39
+ CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
40
+
41
+ def initialize
42
+ super
43
+ require 'zlib'
44
+ require 'net/http'
45
+ require 'time'
46
+ require 'google/api_client'
47
+ require 'signet/oauth_2/client'
48
+ end
49
+
50
+ # Define `log` method for v0.10.42 or earlier
51
+ unless method_defined?(:log)
52
+ define_method("log") { $log }
53
+ end
54
+
55
+ def call_google_api(params)
56
+ # refresh_auth
57
+ if @google_api_client.authorization.expired?
58
+ @google_api_client.authorization.fetch_access_token!
59
+ end
60
+ return @google_api_client.execute(params)
61
+ end
62
+
63
+ def configure(conf)
64
+ if conf['path']
65
+ if conf['path'].index('%S')
66
+ conf['time_slice_format'] = '%Y%m%d%H%M%S'
67
+ elsif conf['path'].index('%M')
68
+ conf['time_slice_format'] = '%Y%m%d%H%M'
69
+ elsif conf['path'].index('%H')
70
+ conf['time_slice_format'] = '%Y%m%d%H'
71
+ end
72
+ end
73
+
74
+ super
75
+
76
+ @client = prepare_client()
77
+
78
+ if @path.index(CHUNK_ID_PLACE_HOLDER).nil?
79
+ raise Fluent::ConfigError, "path must contain ${chunk_id}, which is the placeholder for chunk_id, when append is set to false."
80
+ end
81
+ end
82
+
83
+ def prepare_client
84
+ @google_api_client = Google::APIClient.new(
85
+ :application_name => "fluent-plugin-google-cloud-storage",
86
+ :application_version => "0.3.1")
87
+ begin
88
+ key = Google::APIClient::KeyUtils.load_from_pkcs12(
89
+ @service_pkcs12_path, @service_pks12_password)
90
+ @google_api_client.authorization = Signet::OAuth2::Client.new(
91
+ token_credential_uri: "https://accounts.google.com/o/oauth2/token",
92
+ audience: "https://accounts.google.com/o/oauth2/token",
93
+ issuer: @service_email,
94
+ scope: "https://www.googleapis.com/auth/devstorage.read_write",
95
+ signing_key: key)
96
+ @google_api_client.authorization.fetch_access_token!
97
+ rescue Signet::AuthorizationError
98
+ raise Fluent::ConfigError, "Error occurred authenticating with Google"
99
+ end
100
+ @storage_api = @google_api_client.discovered_api("storage", "v1")
101
+ return @google_api_client
102
+ end
103
+
104
+ def start
105
+ super
106
+ end
107
+
108
+ def shutdown
109
+ super
110
+ end
111
+
112
+ def path_format(chunk_key)
113
+ Time.strptime(chunk_key, @time_slice_format).strftime(@path)
114
+ end
115
+
116
+ def chunk_unique_id_to_str(unique_id)
117
+ unique_id.unpack('C*').map{|x| x.to_s(16).rjust(2,'0')}.join('')
118
+ end
119
+
120
+ def send_data(path, data)
121
+ content_type = "application/json"
122
+
123
+ io = nil
124
+ if ["gz", "gzip"].include?(@compress)
125
+ io = StringIO.new("")
126
+ writer = Zlib::GzipWriter.new(io)
127
+ writer.write(data)
128
+ writer.finish
129
+ io.rewind
130
+ else
131
+ io = StringIO.new(data)
132
+ end
133
+
134
+ media = Google::APIClient::UploadIO.new(io, content_type, File.basename(path))
135
+
136
+ call_google_api(api_method: @storage_api.objects.insert,
137
+ parameters: {
138
+ uploadType: "multipart",
139
+ project: @project_id,
140
+ bucket: @bucket_id,
141
+ name: path
142
+ },
143
+ body_object: { contentType: media.content_type },
144
+ media: media)
145
+ end
146
+
147
+ def write(chunk)
148
+ hdfs_path = path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
149
+
150
+ send_data(hdfs_path, chunk.read)
151
+
152
+ hdfs_path
153
+ end
154
+ end
data/test/helper.rb ADDED
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+
12
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
13
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
14
+ require 'fluent/test'
15
+ unless ENV.has_key?('VERBOSE')
16
+ nulllogger = Object.new
17
+ nulllogger.instance_eval {|obj|
18
+ def method_missing(method, *args)
19
+ # pass
20
+ end
21
+ }
22
+ $log = nulllogger
23
+ end
24
+
25
+ require 'fluent/plugin/out_google_cloud_storage'
26
+
27
+ class Test::Unit::TestCase
28
+ end
@@ -0,0 +1,76 @@
1
+ require 'helper'
2
+
3
+ class WebHDFSOutputTest < Test::Unit::TestCase
4
+ CONFIG = %[
5
+ host namenode.local
6
+ path /hdfs/path/file.%Y%m%d.log
7
+ ]
8
+
9
+ def create_driver(conf=CONFIG,tag='test')
10
+ Fluent::Test::OutputTestDriver.new(Fluent::WebHDFSOutput, tag).configure(conf)
11
+ end
12
+
13
+ def test_configure
14
+ d = create_driver
15
+ assert_equal 'namenode.local', d.instance.instance_eval{ @namenode_host }
16
+ assert_equal 50070, d.instance.instance_eval{ @namenode_port }
17
+ assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
18
+ assert_equal '%Y%m%d', d.instance.time_slice_format
19
+ assert_equal false, d.instance.httpfs
20
+ assert_nil d.instance.username
21
+ assert_equal false, d.instance.ignore_start_check_error
22
+
23
+ assert_equal true, d.instance.output_include_time
24
+ assert_equal true, d.instance.output_include_tag
25
+ assert_equal 'json', d.instance.output_data_type
26
+ assert_nil d.instance.remove_prefix
27
+ assert_equal 'TAB', d.instance.field_separator
28
+ assert_equal true, d.instance.add_newline
29
+ assert_equal 'tag_missing', d.instance.default_tag
30
+
31
+ d = create_driver %[
32
+ namenode server.local:14000
33
+ path /hdfs/path/file.%Y%m%d.%H%M.log
34
+ httpfs yes
35
+ username hdfs_user
36
+ ]
37
+ assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
38
+ assert_equal 14000, d.instance.instance_eval{ @namenode_port }
39
+ assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
40
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
41
+ assert_equal true, d.instance.httpfs
42
+ assert_equal 'hdfs_user', d.instance.username
43
+ end
44
+
45
+ def test_configure_placeholders
46
+ d = create_driver %[
47
+ hostname testing.node.local
48
+ namenode server.local:50070
49
+ path /hdfs/${hostname}/file.%Y%m%d%H.log
50
+ ]
51
+ assert_equal '/hdfs/testing.node.local/file.%Y%m%d%H.log', d.instance.path
52
+ end
53
+
54
+ def test_path_format
55
+ d = create_driver
56
+ assert_equal '/hdfs/path/file.%Y%m%d.log', d.instance.path
57
+ assert_equal '%Y%m%d', d.instance.time_slice_format
58
+ assert_equal '/hdfs/path/file.20120718.log', d.instance.path_format('20120718')
59
+
60
+ d = create_driver %[
61
+ namenode server.local:14000
62
+ path /hdfs/path/file.%Y%m%d.%H%M.log
63
+ ]
64
+ assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
65
+ assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
66
+ assert_equal '/hdfs/path/file.20120718.1503.log', d.instance.path_format('201207181503')
67
+
68
+ assert_raise Fluent::ConfigError do
69
+ d = create_driver %[
70
+ namenode server.local:14000
71
+ path /hdfs/path/file.%Y%m%d.%H%M.log
72
+ append false
73
+ ]
74
+ end
75
+ end
76
+ end
metadata ADDED
@@ -0,0 +1,126 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fluent-plugin-google-cloud-storage
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ platform: ruby
6
+ authors:
7
+ - Hsiu-Fan Wang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-09-30 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fluentd
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.10.53
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.10.53
41
+ - !ruby/object:Gem::Dependency
42
+ name: fluent-mixin-plaintextformatter
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 0.2.1
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 0.2.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: fluent-mixin-config-placeholders
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: 0.3.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: google-api-client
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.7'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.7'
83
+ description: Google Cloud Storage fluentd output
84
+ email:
85
+ - hfwang@porkbuns.net
86
+ executables: []
87
+ extensions: []
88
+ extra_rdoc_files: []
89
+ files:
90
+ - ".gitignore"
91
+ - Gemfile
92
+ - Gemfile.lock
93
+ - LICENSE.txt
94
+ - README.md
95
+ - Rakefile
96
+ - fluent-plugin-google-cloud-storage.gemspec
97
+ - lib/fluent/plugin/out_google_cloud_storage.rb
98
+ - test/helper.rb
99
+ - test/plugin/test_out_google_cloud_storage.rb
100
+ homepage: https://github.com/hfwang/fluent-plugin-google-cloud-storage
101
+ licenses:
102
+ - APLv2
103
+ metadata: {}
104
+ post_install_message:
105
+ rdoc_options: []
106
+ require_paths:
107
+ - lib
108
+ required_ruby_version: !ruby/object:Gem::Requirement
109
+ requirements:
110
+ - - ">="
111
+ - !ruby/object:Gem::Version
112
+ version: '0'
113
+ required_rubygems_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ requirements: []
119
+ rubyforge_project:
120
+ rubygems_version: 2.2.2
121
+ signing_key:
122
+ specification_version: 4
123
+ summary: Fluentd plugin to write data to Google Cloud Storage
124
+ test_files:
125
+ - test/helper.rb
126
+ - test/plugin/test_out_google_cloud_storage.rb