fluent-plugin-td 0.10.25 → 0.10.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a1db60dd2b0ce33a5752790d6f586c86ecf5d5d1
4
- data.tar.gz: 860714c30461e7bd0904b91355eba1c35571e212
3
+ metadata.gz: a3b96e1e6043112fc55402699b53ff5874f91f15
4
+ data.tar.gz: 4a62e347a754636e4e44e633639b9fb4fac3fbbc
5
5
  SHA512:
6
- metadata.gz: 0b7a4e3c95a2180751bde7ae165f4e278aeb961907c16081fc6fba460b92841810dc1c3f5f1fe4b8b1a3c82e71bfc82558eb78121c0c98bc63bdf397549f4a40
7
- data.tar.gz: cc08c575896bc7eb8361ee283391ae71db9e1a6dc3b6820d0327098c2c0347ad0e977a0d5044c7ce428d1dfac0d9865c83aae7828535cbef70e9e1016243f662
6
+ metadata.gz: 4d1f260c0b0f30bd7fbcbbc41dd5ac9bfbb8f0efc42efc2691d10a08d699aaa1514a0d382c7327ea4ff134a3336d4df4230a32e056c86acaf1622da1bc01d5cf
7
+ data.tar.gz: bb9a87d4389f5eef5223842ab59b1ee054e2a6b8ddc06e74f54a75794671be9455da2493696aacc657039bbd2e2b148f6a69207a1b5cb8e975452ae631264b6d
data/ChangeLog CHANGED
@@ -1,3 +1,9 @@
1
+ Release 0.10.26 - 2015/03/03
2
+
3
+ * Add use_gzip_command option to reduce CPU usage
4
+ * Remove useless anonymizer feature
5
+
6
+
1
7
  Release 0.10.25 - 2015/02/06
2
8
 
3
9
  * Improve tempfile handling
@@ -21,4 +21,6 @@ Gem::Specification.new do |gem|
21
21
  gem.add_dependency "td-client", "~> 0.8.66"
22
22
  gem.add_development_dependency "rake", ">= 0.9.2"
23
23
  gem.add_development_dependency "webmock", "~> 1.16"
24
+ gem.add_development_dependency "test-unit", "~> 3.0.8"
25
+ gem.add_development_dependency "test-unit-rr", "~> 1.0.3"
24
26
  end
@@ -7,60 +7,6 @@ module Fluent
7
7
 
8
8
  IMPORT_SIZE_LIMIT = 32 * 1024 * 1024
9
9
 
10
- class Anonymizer
11
- include Configurable
12
- end
13
-
14
- class RawAnonymizer < Anonymizer
15
- def anonymize(obj)
16
- if obj.nil?
17
- nil
18
- elsif obj.is_a?(String)
19
- anonymize_raw obj
20
- elsif obj.is_a?(Numeric)
21
- anonymize_raw obj.to_s
22
- else
23
- # boolean, array, map
24
- anonymize_raw MessagePack.pack(obj)
25
- end
26
- end
27
- end
28
-
29
- class MD5Anonymizer < RawAnonymizer
30
- def anonymize_raw(raw)
31
- Digest::MD5.hexdigest(raw)
32
- end
33
- end
34
-
35
- class IPXORAnonymizer < RawAnonymizer
36
- config_param :xor_key, :string
37
-
38
- def configure(conf)
39
- super
40
-
41
- a1, a2, a3, a4 = @xor_key.split('.')
42
- @xor_keys = [a1.to_i, a2.to_i, a3.to_i, a4.to_i]
43
-
44
- if @xor_keys == [0, 0, 0, 0]
45
- raise ConfigError, "'xor_key' must be IPv4 address"
46
- end
47
- end
48
-
49
- def anonymize_raw(raw)
50
- m = /\A(\d+)\.(\d+)\.(\d+)\.(\d+)/.match(raw)
51
- return nil unless m
52
-
53
- k1, k2, k3, k4 = @xor_keys
54
-
55
- o1 = m[1].to_i ^ k1
56
- o2 = m[2].to_i ^ k2
57
- o3 = m[3].to_i ^ k3
58
- o4 = m[4].to_i ^ k4
59
-
60
- "#{o1}.#{o2}.#{o3}.#{o4}"
61
- end
62
- end
63
-
64
10
  # To support log_level option since Fluentd v0.10.43
65
11
  unless method_defined?(:log)
66
12
  define_method(:log) { $log }
@@ -68,6 +14,7 @@ module Fluent
68
14
 
69
15
  config_param :apikey, :string
70
16
  config_param :auto_create_table, :bool, :default => true
17
+ config_param :use_gzip_command, :bool, :default => false
71
18
 
72
19
  config_param :endpoint, :string, :default => TreasureData::API::NEW_DEFAULT_ENDPOINT
73
20
  config_param :use_ssl, :bool, :default => true
@@ -100,6 +47,16 @@ module Fluent
100
47
  def configure(conf)
101
48
  super
102
49
 
50
+ if @use_gzip_command
51
+ require 'open3'
52
+
53
+ begin
54
+ Open3.capture3("gzip -V")
55
+ rescue Errno::ENOENT
56
+ raise ConfigError, "'gzip' utility must be in PATH for use_gzip_command parameter"
57
+ end
58
+ end
59
+
103
60
  # overwrite default value of buffer_chunk_limit
104
61
  if !conf['buffer_chunk_limit']
105
62
  @buffer.buffer_chunk_limit = IMPORT_SIZE_LIMIT
@@ -117,32 +74,6 @@ module Fluent
117
74
  @key = "#{database}.#{table}"
118
75
  end
119
76
 
120
- @anonymizes = {}
121
- conf.elements.select { |e|
122
- e.name == 'anonymize'
123
- }.each { |e|
124
- key = e['key']
125
- method = e['method']
126
-
127
- case method
128
- when 'md5'
129
- scr = MD5Anonymizer.new
130
- when 'ip_xor'
131
- scr = IPXORAnonymizer.new
132
- else
133
- raise ConfigError, "Unknown anonymize method: #{method}"
134
- end
135
-
136
- scr.configure(e)
137
-
138
- @anonymizes[key] = scr
139
- }
140
- if @anonymizes.empty?
141
- @anonymizes = nil
142
- else
143
- log.warn "<anonymize> feature is deprecated and will be removed. Use fluent-plugin-anonymizer instead."
144
- end
145
-
146
77
  @http_proxy = conf['http_proxy']
147
78
  end
148
79
 
@@ -190,14 +121,6 @@ module Fluent
190
121
  next unless record.is_a?(Hash)
191
122
 
192
123
  begin
193
- if @anonymizes
194
- @anonymizes.each_pair { |key, scr|
195
- if value = record[key]
196
- record[key] = scr.anonymize(value)
197
- end
198
- }
199
- end
200
-
201
124
  record['time'] = time
202
125
  record.delete(:time) if record.has_key?(:time)
203
126
 
@@ -248,21 +171,56 @@ module Fluent
248
171
 
249
172
  FileUtils.mkdir_p(@tmpdir) unless @tmpdir.nil?
250
173
  f = Tempfile.new("tdlog-#{chunk.key}-", @tmpdir)
251
- w = Zlib::GzipWriter.new(f)
252
174
 
253
- chunk.write_to(w)
254
- w.finish
255
- w = nil
256
-
257
- size = f.pos
175
+ size = if @use_gzip_command
176
+ gzip_by_command(chunk, f)
177
+ else
178
+ gzip_by_writer(chunk, f)
179
+ end
258
180
  f.pos = 0
259
181
  upload(database, table, f, size, unique_id)
260
-
261
182
  ensure
262
- w.close if w
263
183
  f.close(true) if f
264
184
  end
265
185
 
186
+ # TODO: Share this routine with s3 compressors
187
+ def gzip_by_command(chunk, tmp)
188
+ chunk_is_file = @buffer_type == 'file'
189
+ path = if chunk_is_file
190
+ chunk.path
191
+ else
192
+ w = Tempfile.new("gzip-tdlog-#{chunk.key}-", @tmpdir)
193
+ chunk.write_to(w)
194
+ w.close
195
+ w.path
196
+ end
197
+ res = system "gzip -c #{path} > #{tmp.path}"
198
+ unless res
199
+ log.warn "failed to execute gzip command. Fallback to GzipWriter. status = #{$?}"
200
+ begin
201
+ tmp.truncate(0)
202
+ return gzip_by_writer(chunk, tmp)
203
+ end
204
+ end
205
+ File.size(tmp.path)
206
+ ensure
207
+ unless chunk_is_file
208
+ w.close(true) rescue nil
209
+ end
210
+ end
211
+
212
+ def gzip_by_writer(chunk, tmp)
213
+ w = Zlib::GzipWriter.new(tmp)
214
+ chunk.write_to(w)
215
+ w.finish
216
+ w = nil
217
+ tmp.pos
218
+ ensure
219
+ if w
220
+ w.close rescue nil
221
+ end
222
+ end
223
+
266
224
  def upload(database, table, io, size, unique_id)
267
225
  unique_str = unique_id.unpack('C*').map { |x| "%02x" % x }.join
268
226
  log.trace { "uploading logs to Treasure Data database=#{database} table=#{table} (#{size}bytes)" }
@@ -1,5 +1,5 @@
1
1
  module Fluent
2
2
  module TreasureDataPlugin
3
- VERSION = '0.10.25'
3
+ VERSION = '0.10.26'
4
4
  end
5
5
  end
@@ -37,7 +37,7 @@ class TreasureDataLogOutputTest < Test::Unit::TestCase
37
37
  d = create_driver
38
38
 
39
39
  {:@apikey => 'testkey', :@use_ssl => true, :@auto_create_table => true,
40
- :@buffer_type => 'file', :@flush_interval => 300}.each { |k, v|
40
+ :@buffer_type => 'file', :@flush_interval => 300, :@use_gzip_command => false}.each { |k, v|
41
41
  assert_equal(d.instance.instance_variable_get(k), v)
42
42
  }
43
43
  end
@@ -48,11 +48,35 @@ class TreasureDataLogOutputTest < Test::Unit::TestCase
48
48
  database, table = d.instance.instance_variable_get(:@key).split(".", 2)
49
49
  stub_td_table_create_request(database, table)
50
50
  stub_td_import_request(stub_request_body(records, time), database, table)
51
+ assert_rr {
52
+ # mock(d.instance).gzip_by_writer(is_a(Fluent::BufferChunk), is_a(Tempfile)) causes empty request body so using dont_allow instead to check calling method
53
+ # We need actual gzipped content to verify compressed body is correct or not.
54
+ dont_allow(d.instance).gzip_by_command(is_a(Fluent::BufferChunk), is_a(Tempfile))
55
+
56
+ records.each { |record|
57
+ d.emit(record, time)
58
+ }
59
+ d.run
60
+ }
51
61
 
52
- records.each { |record|
53
- d.emit(record, time)
62
+ assert_equal('TD1 testkey', @auth_header)
63
+ end
64
+
65
+ def test_emit_with_gzip_command
66
+ d = create_driver(DEFAULT_CONFIG + "use_gzip_command true")
67
+ time, records = stub_seed_values
68
+ database, table = d.instance.instance_variable_get(:@key).split(".", 2)
69
+ stub_td_table_create_request(database, table)
70
+ stub_td_import_request(stub_request_body(records, time), database, table)
71
+ assert_rr {
72
+ # same as test_emit
73
+ dont_allow(d.instance).gzip_by_writer(is_a(Fluent::BufferChunk), is_a(Tempfile))
74
+
75
+ records.each { |record|
76
+ d.emit(record, time)
77
+ }
78
+ d.run
54
79
  }
55
- d.run
56
80
 
57
81
  assert_equal('TD1 testkey', @auth_header)
58
82
  end
data/test/test_helper.rb CHANGED
@@ -5,6 +5,7 @@ require 'webmock/test_unit'
5
5
  require 'stringio'
6
6
  require 'td-client'
7
7
  require 'zlib'
8
+ require 'test/unit/rr'
8
9
 
9
10
  def e(s)
10
11
  require 'cgi'
@@ -67,10 +68,10 @@ class Test::Unit::TestCase
67
68
  endpoint = opts[:endpoint] ? opts[:endpoint] : TreasureData::API::NEW_DEFAULT_IMPORT_ENDPOINT
68
69
 
69
70
  # for check_table_existence
70
- url_with_empty = "#{schema}://#{endpoint}//v3/table/import/#{e(db)}/#{e(table)}/#{format}"
71
+ url_with_empty = "#{schema}://#{endpoint}/v3/table/import/#{e(db)}/#{e(table)}/#{format}"
71
72
  stub_request(:put, url_with_empty).to_return(:status => 200, :body => response)
72
73
 
73
- url_with_unique = Regexp.compile("#{schema}://#{endpoint}//v3/table/import_with_id/#{e(db)}/#{e(table)}/.*/#{format}")
74
+ url_with_unique = Regexp.compile("#{schema}://#{endpoint}/v3/table/import_with_id/#{e(db)}/#{e(table)}/.*/#{format}")
74
75
  stub_request(:put, url_with_unique).with(:headers => {'Content-Type' => 'application/octet-stream'}) { |req|
75
76
  @auth_header = req.headers["Authorization"]
76
77
  stub_gzip_unwrap(req.body) == stub_gzip_unwrap(body)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-td
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.10.25
4
+ version: 0.10.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Treasure Data, Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-05 00:00:00.000000000 Z
11
+ date: 2015-03-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -72,6 +72,34 @@ dependencies:
72
72
  - - "~>"
73
73
  - !ruby/object:Gem::Version
74
74
  version: '1.16'
75
+ - !ruby/object:Gem::Dependency
76
+ name: test-unit
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: 3.0.8
82
+ type: :development
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: 3.0.8
89
+ - !ruby/object:Gem::Dependency
90
+ name: test-unit-rr
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: 1.0.3
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: 1.0.3
75
103
  description: Treasure Data Cloud Data Service plugin for Fluentd
76
104
  email: support@treasure-data.com
77
105
  executables: []