fluent-plugin-webhdfs 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +1 -1
- data/README.md +12 -0
- data/fluent-plugin-webhdfs.gemspec +1 -1
- data/lib/fluent/plugin/out_webhdfs.rb +47 -3
- data/test/plugin/test_out_webhdfs.rb +30 -0
- metadata +2 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 5e43e061acd7552d5a0e482bf7ced1e319d1d107
         | 
| 4 | 
            +
              data.tar.gz: 934ac37c1a6d985261ae573ce0724546481269ff
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: be9c06348692d4f0506f0e7d8caa1fc4d6bfa4d683c1fdb0daa3fd87d4657ddbebaff4380c3fc03405030836cabc2fb856e02754aa8bccff1ddd5f7c7b49980f
         | 
| 7 | 
            +
              data.tar.gz: fee1d87a57a64ea65ca14a2f3f7765eb115bd362d504f0ccade232132a384b4d8f599ccd264c8cdcb2f326c4a8062b7c5e456e4cf11c8cc7fab5d78dadf09e3a
         | 
    
        data/.travis.yml
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -120,6 +120,18 @@ With kerberos authentication: | |
| 120 120 | 
             
                  kerberos true
         | 
| 121 121 | 
             
                </match>
         | 
| 122 122 |  | 
| 123 | 
            +
            If you want to compress data before storing it:
         | 
| 124 | 
            +
             | 
| 125 | 
            +
                <match access.**>
         | 
| 126 | 
            +
                  type webhdfs
         | 
| 127 | 
            +
                  host namenode.your.cluster.local
         | 
| 128 | 
            +
                  port 50070
         | 
| 129 | 
            +
                  path /path/on/hdfs/access.log.%Y%m%d_%H
         | 
| 130 | 
            +
                  compress gzip  # currently only support gzip
         | 
| 131 | 
            +
                </match>
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            Note that if you set `compress gzip`, then the suffix `.gz` will be added to path.
         | 
| 134 | 
            +
             | 
| 123 135 | 
             
            ### Namenode HA / Auto retry for WebHDFS known errors
         | 
| 124 136 |  | 
| 125 137 | 
             
            `fluent-plugin-webhdfs` (v0.2.0 or later) accepts 2 namenodes for Namenode HA (active/standby). Use `standby_namenode` like this:
         | 
| @@ -2,7 +2,7 @@ | |
| 2 2 |  | 
| 3 3 | 
             
            Gem::Specification.new do |gem|
         | 
| 4 4 | 
             
              gem.name          = "fluent-plugin-webhdfs"
         | 
| 5 | 
            -
              gem.version       = "0.4. | 
| 5 | 
            +
              gem.version       = "0.4.1"
         | 
| 6 6 | 
             
              gem.authors       = ["TAGOMORI Satoshi"]
         | 
| 7 7 | 
             
              gem.email         = ["tagomoris@gmail.com"]
         | 
| 8 8 | 
             
              gem.summary       = %q{Fluentd plugin to write data on HDFS over WebHDFS, with flexible formatting}
         | 
| @@ -50,12 +50,20 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput | |
| 50 50 | 
             
                when 'peer'
         | 
| 51 51 | 
             
                  :peer
         | 
| 52 52 | 
             
                else
         | 
| 53 | 
            -
                  raise ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
         | 
| 53 | 
            +
                  raise Fluent::ConfigError, "unexpected parameter on ssl_verify_mode: #{val}"
         | 
| 54 54 | 
             
                end
         | 
| 55 55 | 
             
              end
         | 
| 56 56 |  | 
| 57 57 | 
             
              config_param :kerberos, :bool, :default => false
         | 
| 58 58 |  | 
| 59 | 
            +
              SUPPORTED_COMPRESS = ['gzip']
         | 
| 60 | 
            +
              config_param :compress, :default => nil do |val|
         | 
| 61 | 
            +
                unless SUPPORTED_COMPRESS.include? val
         | 
| 62 | 
            +
                  raise Fluent::ConfigError, "unsupported compress: #{val}"
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
                val
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 59 67 | 
             
              CHUNK_ID_PLACE_HOLDER = '${chunk_id}'
         | 
| 60 68 |  | 
| 61 69 | 
             
              def initialize
         | 
| @@ -216,16 +224,52 @@ class Fluent::WebHDFSOutput < Fluent::TimeSlicedOutput | |
| 216 224 | 
             
                end
         | 
| 217 225 | 
             
              end
         | 
| 218 226 |  | 
| 219 | 
            -
              def  | 
| 227 | 
            +
              def generate_path(chunk)
         | 
| 220 228 | 
             
                hdfs_path = if @append
         | 
| 221 229 | 
             
                              path_format(chunk.key)
         | 
| 222 230 | 
             
                            else
         | 
| 223 231 | 
             
                              path_format(chunk.key).gsub(CHUNK_ID_PLACE_HOLDER, chunk_unique_id_to_str(chunk.unique_id))
         | 
| 224 232 | 
             
                            end
         | 
| 233 | 
            +
                if @compress
         | 
| 234 | 
            +
                  case @compress
         | 
| 235 | 
            +
                  when 'gzip'
         | 
| 236 | 
            +
                    hdfs_path = "#{hdfs_path}.gz"
         | 
| 237 | 
            +
                  end
         | 
| 238 | 
            +
                end
         | 
| 239 | 
            +
                hdfs_path
         | 
| 240 | 
            +
              end
         | 
| 241 | 
            +
             | 
| 242 | 
            +
              def compress_context(chunk, &block)
         | 
| 243 | 
            +
                case @compress
         | 
| 244 | 
            +
                when 'gzip'
         | 
| 245 | 
            +
                  require 'zlib'
         | 
| 246 | 
            +
                  require 'tempfile'
         | 
| 247 | 
            +
                  tmp = Tempfile.new("webhdfs-")
         | 
| 248 | 
            +
                  begin
         | 
| 249 | 
            +
                    w = Zlib::GzipWriter.new(tmp)
         | 
| 250 | 
            +
                    chunk.write_to(w)
         | 
| 251 | 
            +
                    w.close
         | 
| 252 | 
            +
                    tmp.close
         | 
| 253 | 
            +
                    tmp.open
         | 
| 254 | 
            +
                    yield tmp
         | 
| 255 | 
            +
                  ensure
         | 
| 256 | 
            +
                    tmp.close(true) rescue nil
         | 
| 257 | 
            +
                  end
         | 
| 258 | 
            +
                end
         | 
| 259 | 
            +
              end
         | 
| 260 | 
            +
             | 
| 261 | 
            +
              def write(chunk)
         | 
| 262 | 
            +
                hdfs_path = generate_path(chunk)
         | 
| 225 263 |  | 
| 226 264 | 
             
                failovered = false
         | 
| 227 265 | 
             
                begin
         | 
| 228 | 
            -
                   | 
| 266 | 
            +
                  if @compress
         | 
| 267 | 
            +
                    compress_context(chunk) do |data|
         | 
| 268 | 
            +
                      send_data(hdfs_path, data)
         | 
| 269 | 
            +
                    end
         | 
| 270 | 
            +
                  else
         | 
| 271 | 
            +
                    send_data(hdfs_path, chunk.read)
         | 
| 272 | 
            +
                  end
         | 
| 229 273 | 
             
                rescue => e
         | 
| 230 274 | 
             
                  log.warn "failed to communicate hdfs cluster, path: #{hdfs_path}"
         | 
| 231 275 |  | 
| @@ -57,6 +57,17 @@ kerberos true | |
| 57 57 | 
             
                assert_equal '/path/to/ca_file.pem', d.instance.ssl_ca_file
         | 
| 58 58 | 
             
                assert_equal :peer, d.instance.ssl_verify_mode
         | 
| 59 59 | 
             
                assert_equal true, d.instance.kerberos
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                d = create_driver %[
         | 
| 62 | 
            +
            namenode server.local:14000
         | 
| 63 | 
            +
            path /hdfs/path/file.%Y%m%d.%H%M.log
         | 
| 64 | 
            +
            compress gzip
         | 
| 65 | 
            +
            ]
         | 
| 66 | 
            +
                assert_equal 'server.local', d.instance.instance_eval{ @namenode_host }
         | 
| 67 | 
            +
                assert_equal 14000, d.instance.instance_eval{ @namenode_port }
         | 
| 68 | 
            +
                assert_equal '/hdfs/path/file.%Y%m%d.%H%M.log', d.instance.path
         | 
| 69 | 
            +
                assert_equal '%Y%m%d%H%M', d.instance.time_slice_format
         | 
| 70 | 
            +
                assert_equal 'gzip', d.instance.compress
         | 
| 60 71 | 
             
              end
         | 
| 61 72 |  | 
| 62 73 | 
             
              def test_configure_placeholders
         | 
| @@ -90,4 +101,23 @@ path /hdfs/path/file.%Y%m%d.%H%M.log | |
| 90 101 | 
             
                      ]
         | 
| 91 102 | 
             
                end
         | 
| 92 103 | 
             
              end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
              def test_invalid_configure
         | 
| 106 | 
            +
                assert_raise Fluent::ConfigError do
         | 
| 107 | 
            +
                  create_driver %[
         | 
| 108 | 
            +
                    namenode server.local:14000
         | 
| 109 | 
            +
                    path /hdfs/path/file.%Y%m%d.%H%M.log
         | 
| 110 | 
            +
                    ssl true
         | 
| 111 | 
            +
                    ssl_verify_mode invalid
         | 
| 112 | 
            +
                  ]
         | 
| 113 | 
            +
                end
         | 
| 114 | 
            +
                assert_raise Fluent::ConfigError do
         | 
| 115 | 
            +
                  create_driver %[
         | 
| 116 | 
            +
                    namenode server.local:14000
         | 
| 117 | 
            +
                    path /hdfs/path/file.%Y%m%d.%H%M.log
         | 
| 118 | 
            +
                    compress invalid
         | 
| 119 | 
            +
                  ]
         | 
| 120 | 
            +
                end
         | 
| 121 | 
            +
              end
         | 
| 122 | 
            +
             | 
| 93 123 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: fluent-plugin-webhdfs
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.4. | 
| 4 | 
            +
              version: 0.4.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - TAGOMORI Satoshi
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2014-10 | 
| 11 | 
            +
            date: 2014-11-10 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: rake
         |