logstash-output-datahub 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -4
- data/lib/logstash/outputs/datahub.rb +357 -341
- data/logstash-output-datahub.gemspec +2 -2
- metadata +3 -9
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: e846682c134462b56c1ee792a34143797b6b34ce
         | 
| 4 | 
            +
              data.tar.gz: a36cd441f580ffd6763fc41cae69de4c7924f05b
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: eb309d8008b270ac7a8b485c57d48e2d85e3e548ec4d8d963fa4b318b7250b54f07a4cc26ce4ef67a18b80dda43ca4f589282765b2ab87de333093cc1189205c
         | 
| 7 | 
            +
              data.tar.gz: b48d2a40c2677e0c262383765970fe63396bff623bd94449f95b1ebf04dbc6a0a5a902af4ccca2c01ebabafad1d5e9bd38efec8344ffdf0a9e73320b85f7f09b
         | 
    
        data/README.md
    CHANGED
    
    | @@ -71,8 +71,6 @@ output { | |
| 71 71 | 
             
            		topic_name => ""
         | 
| 72 72 | 
             
            		#shard_id => "0"
         | 
| 73 73 | 
             
            		#shard_keys => ["thread_id"]
         | 
| 74 | 
            -
            		batch_size => 10
         | 
| 75 | 
            -
            		batch_timeout => 5
         | 
| 76 74 | 
             
            		dirty_data_continue => true
         | 
| 77 75 | 
             
            		dirty_data_file => "/Users/ph0ly/trash/dirty.data"
         | 
| 78 76 | 
             
            		dirty_data_file_max_size => 1000
         | 
| @@ -89,8 +87,6 @@ project_name(Required): datahub项目名称 | |
| 89 87 | 
             
            topic_name(Required): datahub topic名称
         | 
| 90 88 | 
             
            retry_times(Optional): 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
         | 
| 91 89 | 
             
            retry_interval(Optional): 下一次重试的间隔,单位为秒
         | 
| 92 | 
            -
            batch_size(Optional): 批量提交大小,指定数据积攒到@batch_size大小时触发一次提交,默认100
         | 
| 93 | 
            -
            batch_timeout(Optional): 批量提交超时,在数据量较少的情况下,数据超时后的超时提交,默认5秒
         | 
| 94 90 | 
             
            shard_keys(Optional):数组类型,数据落shard的字段名称,插件会根据这些字段的值计算hash将每条数据落某个shard, 注意shard_keys和shard_id都未指定,默认轮询落shard
         | 
| 95 91 | 
             
            shard_id(Optional): 所有数据落指定的shard,注意shard_keys和shard_id都未指定,默认轮询落shard
         | 
| 96 92 | 
             
            dirty_data_continue(Optional): 脏数据是否继续运行,默认为false,如果指定true,则遇到脏数据直接无视,继续处理数据。当开启该开关,必须指定@dirty_data_file文件
         | 
| @@ -1,341 +1,357 @@ | |
| 1 | 
            -
            #
         | 
| 2 | 
            -
            #Licensed to the Apache Software Foundation (ASF) under one
         | 
| 3 | 
            -
            #or more contributor license agreements.  See the NOTICE file
         | 
| 4 | 
            -
            #distributed with this work for additional information
         | 
| 5 | 
            -
            #regarding copyright ownership.  The ASF licenses this file
         | 
| 6 | 
            -
            #to you under the Apache License, Version 2.0 (the
         | 
| 7 | 
            -
            #"License"); you may not use this file except in compliance
         | 
| 8 | 
            -
            #with the License.  You may obtain a copy of the License at
         | 
| 9 | 
            -
            #
         | 
| 10 | 
            -
            #    http://www.apache.org/licenses/LICENSE-2.0
         | 
| 11 | 
            -
            #
         | 
| 12 | 
            -
            #Unless required by applicable law or agreed to in writing,
         | 
| 13 | 
            -
            #software distributed under the License is distributed on an
         | 
| 14 | 
            -
            #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 15 | 
            -
            #KIND, either express or implied.  See the License for the
         | 
| 16 | 
            -
            #specific language governing permissions and limitations
         | 
| 17 | 
            -
            #under the License.
         | 
| 18 | 
            -
            #
         | 
| 19 | 
            -
            require "logstash/outputs/base"
         | 
| 20 | 
            -
            require "logstash/namespace"
         | 
| 21 | 
            -
            require "logstash/environment"
         | 
| 22 | 
            -
            require "fileutils"
         | 
| 23 | 
            -
            require "thread"
         | 
| 24 | 
            -
             | 
| 25 | 
            -
            jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
         | 
| 26 | 
            -
            LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
         | 
| 27 | 
            -
             | 
| 28 | 
            -
            # Datahub output plugin
         | 
| 29 | 
            -
            class LogStash::Outputs::Datahub < LogStash::Outputs::Base
         | 
| 30 | 
            -
              declare_threadsafe!
         | 
| 31 | 
            -
             | 
| 32 | 
            -
              config_name "datahub"
         | 
| 33 | 
            -
             | 
| 34 | 
            -
              # datahub access id
         | 
| 35 | 
            -
              config :access_id, :validate => :string, :required => true
         | 
| 36 | 
            -
             | 
| 37 | 
            -
              # datahub access key
         | 
| 38 | 
            -
              config :access_key, :validate => :string, :required => true
         | 
| 39 | 
            -
             | 
| 40 | 
            -
              # datahub service endpoint
         | 
| 41 | 
            -
              config :endpoint, :validate => :string, :required => true
         | 
| 42 | 
            -
             | 
| 43 | 
            -
              # datahub project name
         | 
| 44 | 
            -
              config :project_name, :validate => :string, :required => true
         | 
| 45 | 
            -
             | 
| 46 | 
            -
              # datahub topic name
         | 
| 47 | 
            -
              config :topic_name, :validate => :string, :required => true
         | 
| 48 | 
            -
             | 
| 49 | 
            -
              # 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
         | 
| 50 | 
            -
              config :retry_times, :validate => :number, :required => false, :default => -1
         | 
| 51 | 
            -
             | 
| 52 | 
            -
              # 重试周期,下一次重试的间隔,单位为秒
         | 
| 53 | 
            -
              config :retry_interval, :validate => :number, :required => false, :default => 5
         | 
| 54 | 
            -
             | 
| 55 | 
            -
              # 按照指定字段的值计算hash,依据于该hash值落某个shard
         | 
| 56 | 
            -
              config :shard_keys, :validate => :array, :required => false, :default => []
         | 
| 57 | 
            -
             | 
| 58 | 
            -
              # 指定数据落指定的shard
         | 
| 59 | 
            -
              config :shard_id, :validate => :string, :required => false, :default => ""
         | 
| 60 | 
            -
             | 
| 61 | 
            -
              #   # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
         | 
| 62 | 
            -
              #   # 默认为空数组,表示按照topic的顺序及全字段提交
         | 
| 63 | 
            -
              #   # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
         | 
| 64 | 
            -
              #   config :column_names, :validate => :array, :required => false, :default => []
         | 
| 65 | 
            -
             | 
| 66 | 
            -
              # 当出现脏数据时,是否继续写入
         | 
| 67 | 
            -
              # 当开启该开关,必须指定@dirty_data_file文件
         | 
| 68 | 
            -
              config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
         | 
| 69 | 
            -
             | 
| 70 | 
            -
              # 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
         | 
| 71 | 
            -
              # 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
         | 
| 72 | 
            -
              config :dirty_data_file, :validate => :string, :required => false
         | 
| 73 | 
            -
             | 
| 74 | 
            -
              # 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
         | 
| 75 | 
            -
              config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
         | 
| 76 | 
            -
             | 
| 77 | 
            -
              # 数据传输压缩方式选择,目前支持deflate, lz4格式 
         | 
| 78 | 
            -
              config :compress_method, :validate => :string, :required => false, :default => ""
         | 
| 79 | 
            -
             | 
| 80 | 
            -
              # 该值内部使用,不提供配置
         | 
| 81 | 
            -
              # 分发shard的游标
         | 
| 82 | 
            -
              attr_accessor :shard_cursor
         | 
| 83 | 
            -
             | 
| 84 | 
            -
              # Shard cursor lock
         | 
| 85 | 
            -
              @@shard_lock = Mutex.new
         | 
| 86 | 
            -
             | 
| 87 | 
            -
              # 写文件锁
         | 
| 88 | 
            -
              @@file_lock = Mutex.new
         | 
| 89 | 
            -
             | 
| 90 | 
            -
              DatahubPackage = com.aliyun.datahub
         | 
| 91 | 
            -
             | 
| 92 | 
            -
              public
         | 
| 93 | 
            -
              def register
         | 
| 94 | 
            -
                begin
         | 
| 95 | 
            -
                  @account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
         | 
| 96 | 
            -
                  @conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
         | 
| 97 | 
            -
                  if @compress_method == "deflate" || @compress_method == "lz4"
         | 
| 98 | 
            -
                    @compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
         | 
| 99 | 
            -
                    @conf.setCompressionFormat(@compression_format)
         | 
| 100 | 
            -
                  end
         | 
| 101 | 
            -
             | 
| 102 | 
            -
                  @client = DatahubPackage.DatahubClient::new(@conf)
         | 
| 103 | 
            -
                  @project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
         | 
| 104 | 
            -
                  @topic = @project.getTopic(@topic_name)
         | 
| 105 | 
            -
                  @shard_cursor = 0
         | 
| 106 | 
            -
             | 
| 107 | 
            -
                  @shards = get_active_shards(@topic.listShard())
         | 
| 108 | 
            -
                  @shard_count = @shards.size()
         | 
| 109 | 
            -
             | 
| 110 | 
            -
                  result = @client.getTopic(@project_name, @topic_name)
         | 
| 111 | 
            -
                  @schema = result.getRecordSchema()
         | 
| 112 | 
            -
                  fields = @schema.getFields()
         | 
| 113 | 
            -
                  @columns_size = fields.size
         | 
| 114 | 
            -
                  @ | 
| 115 | 
            -
                  for i in 0...@columns_size
         | 
| 116 | 
            -
                    @ | 
| 117 | 
            -
                  end
         | 
| 118 | 
            -
             | 
| 119 | 
            -
                   | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
             | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 127 | 
            -
             | 
| 128 | 
            -
             | 
| 129 | 
            -
                   | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
                 | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 140 | 
            -
             | 
| 141 | 
            -
             | 
| 142 | 
            -
             | 
| 143 | 
            -
             | 
| 144 | 
            -
             | 
| 145 | 
            -
             | 
| 146 | 
            -
                     | 
| 147 | 
            -
                  end
         | 
| 148 | 
            -
             | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 152 | 
            -
             | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
                     | 
| 158 | 
            -
             | 
| 159 | 
            -
             | 
| 160 | 
            -
             | 
| 161 | 
            -
             | 
| 162 | 
            -
             | 
| 163 | 
            -
             | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
                 | 
| 167 | 
            -
             | 
| 168 | 
            -
             | 
| 169 | 
            -
             | 
| 170 | 
            -
             | 
| 171 | 
            -
             | 
| 172 | 
            -
               | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 175 | 
            -
             | 
| 176 | 
            -
             | 
| 177 | 
            -
             | 
| 178 | 
            -
             | 
| 179 | 
            -
                   | 
| 180 | 
            -
                    entry. | 
| 181 | 
            -
                  elsif field_type == DatahubPackage.common.data.FieldType:: | 
| 182 | 
            -
                    entry. | 
| 183 | 
            -
                  elsif field_type == DatahubPackage.common.data.FieldType:: | 
| 184 | 
            -
                    entry. | 
| 185 | 
            -
                   | 
| 186 | 
            -
                     | 
| 187 | 
            -
                   | 
| 188 | 
            -
             | 
| 189 | 
            -
             | 
| 190 | 
            -
             | 
| 191 | 
            -
                   | 
| 192 | 
            -
                   | 
| 193 | 
            -
             | 
| 194 | 
            -
             | 
| 195 | 
            -
             | 
| 196 | 
            -
             | 
| 197 | 
            -
             | 
| 198 | 
            -
                     | 
| 199 | 
            -
             | 
| 200 | 
            -
             | 
| 201 | 
            -
             | 
| 202 | 
            -
             | 
| 203 | 
            -
             | 
| 204 | 
            -
             | 
| 205 | 
            -
             | 
| 206 | 
            -
             | 
| 207 | 
            -
             | 
| 208 | 
            -
             | 
| 209 | 
            -
             | 
| 210 | 
            -
                 | 
| 211 | 
            -
             | 
| 212 | 
            -
             | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 217 | 
            -
             | 
| 218 | 
            -
                   | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 221 | 
            -
             | 
| 222 | 
            -
             | 
| 223 | 
            -
                 | 
| 224 | 
            -
             | 
| 225 | 
            -
             | 
| 226 | 
            -
             | 
| 227 | 
            -
             | 
| 228 | 
            -
             | 
| 229 | 
            -
             | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 234 | 
            -
                 | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
                   | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 242 | 
            -
                   | 
| 243 | 
            -
                   | 
| 244 | 
            -
             | 
| 245 | 
            -
             | 
| 246 | 
            -
             | 
| 247 | 
            -
             | 
| 248 | 
            -
             | 
| 249 | 
            -
                 | 
| 250 | 
            -
             | 
| 251 | 
            -
             | 
| 252 | 
            -
             | 
| 253 | 
            -
             | 
| 254 | 
            -
             | 
| 255 | 
            -
             | 
| 256 | 
            -
             | 
| 257 | 
            -
             | 
| 258 | 
            -
             | 
| 259 | 
            -
                     | 
| 260 | 
            -
             | 
| 261 | 
            -
                     | 
| 262 | 
            -
                     | 
| 263 | 
            -
             | 
| 264 | 
            -
             | 
| 265 | 
            -
                     | 
| 266 | 
            -
             | 
| 267 | 
            -
             | 
| 268 | 
            -
             | 
| 269 | 
            -
             | 
| 270 | 
            -
                     | 
| 271 | 
            -
             | 
| 272 | 
            -
             | 
| 273 | 
            -
                       | 
| 274 | 
            -
                       | 
| 275 | 
            -
             | 
| 276 | 
            -
                         | 
| 277 | 
            -
             | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 281 | 
            -
             | 
| 282 | 
            -
             | 
| 283 | 
            -
             | 
| 284 | 
            -
             | 
| 285 | 
            -
             | 
| 286 | 
            -
             | 
| 287 | 
            -
             | 
| 288 | 
            -
             | 
| 289 | 
            -
             | 
| 290 | 
            -
             | 
| 291 | 
            -
             | 
| 292 | 
            -
             | 
| 293 | 
            -
             | 
| 294 | 
            -
                       | 
| 295 | 
            -
                       | 
| 296 | 
            -
             | 
| 297 | 
            -
             | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
             | 
| 301 | 
            -
                   | 
| 302 | 
            -
             | 
| 303 | 
            -
             | 
| 304 | 
            -
             | 
| 305 | 
            -
             | 
| 306 | 
            -
             | 
| 307 | 
            -
             | 
| 308 | 
            -
             | 
| 309 | 
            -
             | 
| 310 | 
            -
             | 
| 311 | 
            -
             | 
| 312 | 
            -
             | 
| 313 | 
            -
             | 
| 314 | 
            -
             | 
| 315 | 
            -
                   | 
| 316 | 
            -
                   | 
| 317 | 
            -
             | 
| 318 | 
            -
             | 
| 319 | 
            -
             | 
| 320 | 
            -
             | 
| 321 | 
            -
             | 
| 322 | 
            -
             | 
| 323 | 
            -
                     | 
| 324 | 
            -
             | 
| 325 | 
            -
                     | 
| 326 | 
            -
             | 
| 327 | 
            -
                   | 
| 328 | 
            -
             | 
| 329 | 
            -
             | 
| 330 | 
            -
             | 
| 331 | 
            -
                   | 
| 332 | 
            -
             | 
| 333 | 
            -
             | 
| 334 | 
            -
             | 
| 335 | 
            -
                     | 
| 336 | 
            -
                    retry
         | 
| 337 | 
            -
             | 
| 338 | 
            -
             | 
| 339 | 
            -
             | 
| 340 | 
            -
             | 
| 341 | 
            -
             | 
| 1 | 
            +
            #
         | 
| 2 | 
            +
            #Licensed to the Apache Software Foundation (ASF) under one
         | 
| 3 | 
            +
            #or more contributor license agreements.  See the NOTICE file
         | 
| 4 | 
            +
            #distributed with this work for additional information
         | 
| 5 | 
            +
            #regarding copyright ownership.  The ASF licenses this file
         | 
| 6 | 
            +
            #to you under the Apache License, Version 2.0 (the
         | 
| 7 | 
            +
            #"License"); you may not use this file except in compliance
         | 
| 8 | 
            +
            #with the License.  You may obtain a copy of the License at
         | 
| 9 | 
            +
            #
         | 
| 10 | 
            +
            #    http://www.apache.org/licenses/LICENSE-2.0
         | 
| 11 | 
            +
            #
         | 
| 12 | 
            +
            #Unless required by applicable law or agreed to in writing,
         | 
| 13 | 
            +
            #software distributed under the License is distributed on an
         | 
| 14 | 
            +
            #"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
         | 
| 15 | 
            +
            #KIND, either express or implied.  See the License for the
         | 
| 16 | 
            +
            #specific language governing permissions and limitations
         | 
| 17 | 
            +
            #under the License.
         | 
| 18 | 
            +
            #
         | 
| 19 | 
            +
            require "logstash/outputs/base"
         | 
| 20 | 
            +
            require "logstash/namespace"
         | 
| 21 | 
            +
            require "logstash/environment"
         | 
| 22 | 
            +
            require "fileutils"
         | 
| 23 | 
            +
            require "thread"
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            jar_path=File.expand_path(File.join(File.dirname(__FILE__), "../../.."))
         | 
| 26 | 
            +
            LogStash::Environment.load_runtime_jars! File.join(jar_path, "vendor")
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # Datahub output plugin
         | 
| 29 | 
            +
            class LogStash::Outputs::Datahub < LogStash::Outputs::Base
         | 
| 30 | 
            +
              declare_threadsafe!
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              config_name "datahub"
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              # datahub access id
         | 
| 35 | 
            +
              config :access_id, :validate => :string, :required => true
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              # datahub access key
         | 
| 38 | 
            +
              config :access_key, :validate => :string, :required => true
         | 
| 39 | 
            +
             | 
| 40 | 
            +
              # datahub service endpoint
         | 
| 41 | 
            +
              config :endpoint, :validate => :string, :required => true
         | 
| 42 | 
            +
             | 
| 43 | 
            +
              # datahub project name
         | 
| 44 | 
            +
              config :project_name, :validate => :string, :required => true
         | 
| 45 | 
            +
             | 
| 46 | 
            +
              # datahub topic name
         | 
| 47 | 
            +
              config :topic_name, :validate => :string, :required => true
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              # 重试次数,-1为无限重试、0为不重试、>0表示需要有限次数
         | 
| 50 | 
            +
              config :retry_times, :validate => :number, :required => false, :default => -1
         | 
| 51 | 
            +
             | 
| 52 | 
            +
              # 重试周期,下一次重试的间隔,单位为秒
         | 
| 53 | 
            +
              config :retry_interval, :validate => :number, :required => false, :default => 5
         | 
| 54 | 
            +
             | 
| 55 | 
            +
              # 按照指定字段的值计算hash,依据于该hash值落某个shard
         | 
| 56 | 
            +
              config :shard_keys, :validate => :array, :required => false, :default => []
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              # 指定数据落指定的shard
         | 
| 59 | 
            +
              config :shard_id, :validate => :string, :required => false, :default => ""
         | 
| 60 | 
            +
             | 
| 61 | 
            +
              #   # 提交的列名,用户可以配置topic的列,采集部分列或者全部列
         | 
| 62 | 
            +
              #   # 默认为空数组,表示按照topic的顺序及全字段提交
         | 
| 63 | 
            +
              #   # 另外:列的配置不用保序,但是要求该字段在topic的schema中存在
         | 
| 64 | 
            +
              #   config :column_names, :validate => :array, :required => false, :default => []
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              # 当出现脏数据时,是否继续写入
         | 
| 67 | 
            +
              # 当开启该开关,必须指定@dirty_data_file文件
         | 
| 68 | 
            +
              config :dirty_data_continue, :validate => :boolean, :required => false, :default => false
         | 
| 69 | 
            +
             | 
| 70 | 
            +
              # 脏数据文件名称,当数据文件名称,在@dirty_data_continue开启的情况下,需要指定该值
         | 
| 71 | 
            +
              # 特别注意:脏数据文件将被分割成两个部分.part1和.part2,part1作为更早的脏数据,part2作为更新的数据
         | 
| 72 | 
            +
              config :dirty_data_file, :validate => :string, :required => false
         | 
| 73 | 
            +
             | 
| 74 | 
            +
              # 脏数据文件的最大大小,该值保证脏数据文件最大大小不超过这个值,目前该值仅是一个参考值
         | 
| 75 | 
            +
              config :dirty_data_file_max_size, :validate => :number, :required => false, :default => 50024000
         | 
| 76 | 
            +
             | 
| 77 | 
            +
              # 数据传输压缩方式选择,目前支持deflate, lz4格式 
         | 
| 78 | 
            +
              config :compress_method, :validate => :string, :required => false, :default => ""
         | 
| 79 | 
            +
             | 
| 80 | 
            +
              # 该值内部使用,不提供配置
         | 
| 81 | 
            +
              # 分发shard的游标
         | 
| 82 | 
            +
              attr_accessor :shard_cursor
         | 
| 83 | 
            +
             | 
| 84 | 
            +
              # Shard cursor lock
         | 
| 85 | 
            +
              @@shard_lock = Mutex.new
         | 
| 86 | 
            +
             | 
| 87 | 
            +
              # 写文件锁
         | 
| 88 | 
            +
              @@file_lock = Mutex.new
         | 
| 89 | 
            +
             | 
| 90 | 
            +
              DatahubPackage = com.aliyun.datahub
         | 
| 91 | 
            +
             | 
| 92 | 
            +
              public
         | 
| 93 | 
            +
              def register
         | 
| 94 | 
            +
                begin
         | 
| 95 | 
            +
                  @account = DatahubPackage.auth.AliyunAccount::new(@access_id, @access_key)
         | 
| 96 | 
            +
                  @conf = DatahubPackage.DatahubConfiguration::new(@account, @endpoint)
         | 
| 97 | 
            +
                  if @compress_method == "deflate" || @compress_method == "lz4"
         | 
| 98 | 
            +
                    @compression_format = DatahubPackage.model.compress.CompressionFormat.fromValue(@compress_method)
         | 
| 99 | 
            +
                    @conf.setCompressionFormat(@compression_format)
         | 
| 100 | 
            +
                  end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                  @client = DatahubPackage.DatahubClient::new(@conf)
         | 
| 103 | 
            +
                  @project = DatahubPackage.wrapper.Project::Builder.build(@project_name, @client)
         | 
| 104 | 
            +
                  @topic = @project.getTopic(@topic_name)
         | 
| 105 | 
            +
                  @shard_cursor = 0
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                  @shards = get_active_shards(@topic.listShard())
         | 
| 108 | 
            +
                  @shard_count = @shards.size()
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                  result = @client.getTopic(@project_name, @topic_name)
         | 
| 111 | 
            +
                  @schema = result.getRecordSchema()
         | 
| 112 | 
            +
                  fields = @schema.getFields()
         | 
| 113 | 
            +
                  @columns_size = fields.size
         | 
| 114 | 
            +
                  @columnnames = []
         | 
| 115 | 
            +
                  for i in 0...@columns_size
         | 
| 116 | 
            +
                    @columnnames.push(fields[i].getName())
         | 
| 117 | 
            +
                  end
         | 
| 118 | 
            +
                  @columntypes = []
         | 
| 119 | 
            +
                  for i in 0...@columns_size
         | 
| 120 | 
            +
                    @columntypes.push(fields[i].getType())
         | 
| 121 | 
            +
                  end
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                  # 前置校验参数
         | 
| 124 | 
            +
                  check_params()
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                  if @shard_count == 0
         | 
| 127 | 
            +
                    @logger.error "No active shard available, please check"
         | 
| 128 | 
            +
                    raise "No active shard available, please check"
         | 
| 129 | 
            +
                  end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                  @logger.info "Init datahub success!"
         | 
| 132 | 
            +
                rescue => e
         | 
| 133 | 
            +
                  @logger.error "Init failed!"  + e.message + " " + e.backtrace.inspect.to_s
         | 
| 134 | 
            +
                  raise e
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
              end # def register
         | 
| 137 | 
            +
             | 
| 138 | 
            +
              def check_params()
         | 
| 139 | 
            +
                # 如果shard_id配置了,则检查该shard是否ok
         | 
| 140 | 
            +
                if !@shard_id.empty?
         | 
| 141 | 
            +
                  valid = false
         | 
| 142 | 
            +
                  for i in 0...@shards.size
         | 
| 143 | 
            +
                    shard_entry = @shards[i]
         | 
| 144 | 
            +
                    if shard_entry.getShardId() == @shard_id && shard_entry.getState() == DatahubPackage.model.ShardState::ACTIVE
         | 
| 145 | 
            +
                      valid = true
         | 
| 146 | 
            +
                    end
         | 
| 147 | 
            +
                  end
         | 
| 148 | 
            +
                  if (!valid)
         | 
| 149 | 
            +
                    @logger.error "Config shard_id not exists or state not active, check your config"
         | 
| 150 | 
            +
                    raise "Config shard_id not exists or state not active, check your config"
         | 
| 151 | 
            +
                  end
         | 
| 152 | 
            +
                end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
                # 检查shard_keys字段是否合法
         | 
| 155 | 
            +
                if @shard_keys.size > 0
         | 
| 156 | 
            +
                  for i in 0...@shard_keys.size
         | 
| 157 | 
            +
                    shard_key = @shard_keys[i]
         | 
| 158 | 
            +
                    if !@schema.containsField(shard_key)
         | 
| 159 | 
            +
                      @logger.error "Config shard_keys contains one or one more unknown field, check your config"
         | 
| 160 | 
            +
                      raise "Config shard_keys contains one or one more unknown field, check your config"
         | 
| 161 | 
            +
                    end
         | 
| 162 | 
            +
                  end
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                # 配置了脏数据继续,必须指定脏数据文件
         | 
| 166 | 
            +
                if @dirty_data_continue
         | 
| 167 | 
            +
                  if @dirty_data_file.to_s.chomp.length == 0
         | 
| 168 | 
            +
                    raise "Dirty data file path can not be empty"
         | 
| 169 | 
            +
                  end
         | 
| 170 | 
            +
                end
         | 
| 171 | 
            +
             | 
| 172 | 
            +
              end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
              # 检查并设置数据到entry中
         | 
| 175 | 
            +
              # 如果解析数据异常,则数据落脏数据文件
         | 
| 176 | 
            +
              def check_and_set_data(entry, field_type, index, event_map, column_name)
         | 
| 177 | 
            +
                data = event_map[column_name]
         | 
| 178 | 
            +
                begin
         | 
| 179 | 
            +
                  if field_type == DatahubPackage.common.data.FieldType::STRING
         | 
| 180 | 
            +
                    entry.setString(index, data.to_s)
         | 
| 181 | 
            +
                  elsif field_type == DatahubPackage.common.data.FieldType::BIGINT
         | 
| 182 | 
            +
                    entry.setBigint(index, java.lang.Long.parseLong(data.to_s))
         | 
| 183 | 
            +
                  elsif field_type == DatahubPackage.common.data.FieldType::DOUBLE
         | 
| 184 | 
            +
                    entry.setDouble(index, java.lang.Double.parseDouble(data.to_s))
         | 
| 185 | 
            +
                  elsif field_type == DatahubPackage.common.data.FieldType::BOOLEAN
         | 
| 186 | 
            +
                    entry.setBoolean(index, java.lang.Boolean.parseBoolean(data.to_s))
         | 
| 187 | 
            +
                  elsif field_type == DatahubPackage.common.data.FieldType::TIMESTAMP
         | 
| 188 | 
            +
                    entry.setTimeStamp(index, java.lang.Long.parseLong(data.to_s))
         | 
| 189 | 
            +
                  else
         | 
| 190 | 
            +
                    raise "Unknown schema type of data"
         | 
| 191 | 
            +
                  end
         | 
| 192 | 
            +
                  return true
         | 
| 193 | 
            +
                rescue => e
         | 
| 194 | 
            +
                  @logger.error "Parse data: " + column_name + "[" + data + "] failed, " + e.message
         | 
| 195 | 
            +
                  # 数据格式有异常,根据配置参数确定是否续跑
         | 
| 196 | 
            +
                  if !@dirty_data_continue
         | 
| 197 | 
            +
                    @logger.error "Dirty data found, exit process now."
         | 
| 198 | 
            +
                    puts "Dirty data found, exit process now."
         | 
| 199 | 
            +
                    Process.exit(1)
         | 
| 200 | 
            +
                    # 忽略的异常数据直接落文件
         | 
| 201 | 
            +
                  else
         | 
| 202 | 
            +
                    write_as_dirty_data(event_map)
         | 
| 203 | 
            +
                  end
         | 
| 204 | 
            +
                  return false
         | 
| 205 | 
            +
                end
         | 
| 206 | 
            +
              end
         | 
| 207 | 
            +
             | 
| 208 | 
            +
              # 脏数据文件处理
         | 
| 209 | 
            +
              def write_as_dirty_data(event_amp)
         | 
| 210 | 
            +
                dirty_file_part1_name = @dirty_data_file + ".part1"
         | 
| 211 | 
            +
                dirty_file_part2_name = @dirty_data_file + ".part2"
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                # 加锁写入
         | 
| 214 | 
            +
                @@file_lock.synchronize {
         | 
| 215 | 
            +
                  dirty_file_part2 = File.open(dirty_file_part2_name, "a+")
         | 
| 216 | 
            +
                  dirty_file_part2.puts(event_amp.to_s)
         | 
| 217 | 
            +
                  dirty_file_part2.close
         | 
| 218 | 
            +
                  if File.size(dirty_file_part2_name) > @dirty_data_file_max_size / 2
         | 
| 219 | 
            +
                    # .part1, .part2分别存储数据
         | 
| 220 | 
            +
                    # 旧数据落part1,新的数据落part2
         | 
| 221 | 
            +
                    FileUtils.mv(dirty_file_part2_name, dirty_file_part1_name)
         | 
| 222 | 
            +
                  end
         | 
| 223 | 
            +
                }
         | 
| 224 | 
            +
              end
         | 
| 225 | 
            +
             | 
| 226 | 
            +
              def get_active_shards(shards)
         | 
| 227 | 
            +
                active_shards = []
         | 
| 228 | 
            +
                for i in 0...shards.size
         | 
| 229 | 
            +
                  entry = shards.get(i)
         | 
| 230 | 
            +
                  if entry.getState() == DatahubPackage.model.ShardState::ACTIVE
         | 
| 231 | 
            +
                    active_shards.push(entry)
         | 
| 232 | 
            +
                  end
         | 
| 233 | 
            +
                end
         | 
| 234 | 
            +
                return active_shards
         | 
| 235 | 
            +
              end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
              def get_next_shard_id()
         | 
| 238 | 
            +
                if !@shard_id.empty?
         | 
| 239 | 
            +
                  return @shard_id
         | 
| 240 | 
            +
                  # 否则轮询写入shard
         | 
| 241 | 
            +
                else
         | 
| 242 | 
            +
                  idx = 0
         | 
| 243 | 
            +
                  @@shard_lock.synchronize {
         | 
| 244 | 
            +
                    idx = @shard_cursor % @shard_count
         | 
| 245 | 
            +
                    @shard_cursor = idx + 1
         | 
| 246 | 
            +
                  }
         | 
| 247 | 
            +
                  shard_id = @shards[idx].getShardId()
         | 
| 248 | 
            +
                  return shard_id
         | 
| 249 | 
            +
                end
         | 
| 250 | 
            +
              end
         | 
| 251 | 
            +
             | 
| 252 | 
            +
              def multi_receive(event_list)
         | 
| 253 | 
            +
                retry_count = 0
         | 
| 254 | 
            +
                begin
         | 
| 255 | 
            +
                  entries = []
         | 
| 256 | 
            +
                  shard_id = get_next_shard_id()
         | 
| 257 | 
            +
             | 
| 258 | 
            +
                  event_list.each do |event|
         | 
| 259 | 
            +
                    if event == LogStash::SHUTDOWN
         | 
| 260 | 
            +
                      return
         | 
| 261 | 
            +
                    end
         | 
| 262 | 
            +
                    event_map = event.to_hash
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                    entry = DatahubPackage.model.RecordEntry::new(@schema)
         | 
| 265 | 
            +
                    entry.putAttribute("srcId", event_map["host"].to_s)
         | 
| 266 | 
            +
                    entry.putAttribute("ts", event_map["@timestamp"].to_s)
         | 
| 267 | 
            +
                    entry.putAttribute("version", event_map["@version"].to_s)
         | 
| 268 | 
            +
                    entry.putAttribute("srcType", "log")
         | 
| 269 | 
            +
             | 
| 270 | 
            +
                    is_data_valid = false
         | 
| 271 | 
            +
                    for i in 0...@columns_size do
         | 
| 272 | 
            +
                      column_name = @columnnames[i]
         | 
| 273 | 
            +
                      column_type = @columntypes[i]
         | 
| 274 | 
            +
                      value = event_map[column_name]
         | 
| 275 | 
            +
                      if value != nil
         | 
| 276 | 
            +
                        is_data_valid = check_and_set_data(entry, column_type, i, event_map, column_name)
         | 
| 277 | 
            +
                        break if !is_data_valid
         | 
| 278 | 
            +
                      end
         | 
| 279 | 
            +
                    end
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                    if is_data_valid
         | 
| 282 | 
            +
                      if @shard_keys.size > 0
         | 
| 283 | 
            +
                        hash_string = ""
         | 
| 284 | 
            +
                        for i in 0...@shard_keys.size
         | 
| 285 | 
            +
                          shard_key = @shard_keys[i]
         | 
| 286 | 
            +
                          if event_map[shard_key] != nil
         | 
| 287 | 
            +
                            hash_string += event_map[shard_key].to_s + ","
         | 
| 288 | 
            +
                          end
         | 
| 289 | 
            +
                        end
         | 
| 290 | 
            +
                        hashed_value = java.lang.String.new(hash_string).hashCode()
         | 
| 291 | 
            +
                        entry.setPartitionKey(hashed_value)
         | 
| 292 | 
            +
                      else
         | 
| 293 | 
            +
                        entry.setShardId(shard_id)
         | 
| 294 | 
            +
                      end
         | 
| 295 | 
            +
                      entries.push(entry)
         | 
| 296 | 
            +
                    end
         | 
| 297 | 
            +
                  end
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                  # puts "total: " + entries.size.to_s
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                  # 提交列表必须有数据
         | 
| 302 | 
            +
                  if entries.size > 0
         | 
| 303 | 
            +
                    put_result = @client.putRecords(@project_name, @topic_name, entries)
         | 
| 304 | 
            +
                    if put_result.getFailedRecordCount() > 0
         | 
| 305 | 
            +
                      @logger.info "Put " + put_result.getFailedRecordCount().to_s + " records to datahub failed, total " + entries.size().to_s
         | 
| 306 | 
            +
                      sleep @retry_interval
         | 
| 307 | 
            +
                      entries = put_result.getFailedRecords()
         | 
| 308 | 
            +
                      raise "Write to datahub failed: " + entries.size.to_s
         | 
| 309 | 
            +
                    else
         | 
| 310 | 
            +
                      @logger.info "Put data to datahub success, total " + entries.size().to_s
         | 
| 311 | 
            +
                    end
         | 
| 312 | 
            +
                  end
         | 
| 313 | 
            +
             | 
| 314 | 
            +
                rescue DatahubPackage.exception.DatahubServiceException => e
         | 
| 315 | 
            +
                  @logger.error "Flush data exception: " + e.message #+ " " + e.backtrace.inspect.to_s
         | 
| 316 | 
            +
                  # shard的状态改变,需要重新加载shard
         | 
| 317 | 
            +
                  if e.getErrorCode() == "InvalidShardOperation"
         | 
| 318 | 
            +
                    @shards = get_active_shards(@topic.listShard())
         | 
| 319 | 
            +
                    @shard_count = @shards.size()
         | 
| 320 | 
            +
             | 
| 321 | 
            +
                    if @shard_count == 0
         | 
| 322 | 
            +
                      @logger.error "No active shard available, please check"
         | 
| 323 | 
            +
                    end
         | 
| 324 | 
            +
                  elsif e.getErrorCode() == nil
         | 
| 325 | 
            +
                    sleep @retry_interval
         | 
| 326 | 
            +
                  end
         | 
| 327 | 
            +
                  retry_count += 1
         | 
| 328 | 
            +
                  @logger.warn "Now retry: " + retry_count.to_s
         | 
| 329 | 
            +
                  retry
         | 
| 330 | 
            +
                rescue => e
         | 
| 331 | 
            +
                  @logger.error "Flush data exception: " + e.message + " " + e.backtrace.inspect.to_s
         | 
| 332 | 
            +
             | 
| 333 | 
            +
                  # 无限重试
         | 
| 334 | 
            +
                  if @retry_times < 0
         | 
| 335 | 
            +
                    retry_count += 1
         | 
| 336 | 
            +
                    @logger.warn "Now retry: " + retry_count.to_s
         | 
| 337 | 
            +
                    # puts "Now retry..."
         | 
| 338 | 
            +
                    sleep @retry_interval
         | 
| 339 | 
            +
                    retry
         | 
| 340 | 
            +
                  elsif @retry_times == 0
         | 
| 341 | 
            +
                    @logger.error "Retry not work, now exit"
         | 
| 342 | 
            +
                    Process.exit(1)
         | 
| 343 | 
            +
                    # 继续重试
         | 
| 344 | 
            +
                  elsif @retry_times > 0
         | 
| 345 | 
            +
                    retry_count += 1
         | 
| 346 | 
            +
                    if retry_count > @retry_times
         | 
| 347 | 
            +
                      @logger.warn "Retry over: " + @retry_times.to_s
         | 
| 348 | 
            +
                      Process.exit(1)
         | 
| 349 | 
            +
                    end
         | 
| 350 | 
            +
                    @logger.warn "Now retry..."
         | 
| 351 | 
            +
                    sleep @retry_interval
         | 
| 352 | 
            +
                    retry
         | 
| 353 | 
            +
                  end
         | 
| 354 | 
            +
                end
         | 
| 355 | 
            +
              end # def multi_receive
         | 
| 356 | 
            +
             | 
| 357 | 
            +
            end # class LogStash::Outputs::Datahub
         | 
| @@ -1,6 +1,6 @@ | |
| 1 1 | 
             
            Gem::Specification.new do |s|
         | 
| 2 2 | 
             
              s.name = 'logstash-output-datahub'
         | 
| 3 | 
            -
              s.version         = "1.0. | 
| 3 | 
            +
              s.version         = "1.0.1"
         | 
| 4 4 | 
             
              s.licenses = ["Apache License (2.0)"]
         | 
| 5 5 | 
             
              s.summary = "This aliyun-datahub output plugin."
         | 
| 6 6 | 
             
              s.description = "This gem is a logstash plugin required to be installed on top of the Logstash core pipeline using $LS_HOME/bin/plugin install gemname. This gem is not a stand-alone program"
         | 
| @@ -19,7 +19,7 @@ Gem::Specification.new do |s| | |
| 19 19 |  | 
| 20 20 | 
             
              # Gem dependencies
         | 
| 21 21 | 
             
              s.add_runtime_dependency 'stud'
         | 
| 22 | 
            -
              s.add_runtime_dependency "logstash-core", ">= 2.0.0" | 
| 22 | 
            +
              s.add_runtime_dependency "logstash-core", ">= 2.0.0"
         | 
| 23 23 | 
             
              s.add_runtime_dependency "logstash-codec-plain"
         | 
| 24 24 | 
             
              s.add_development_dependency "logstash-devutils"
         | 
| 25 25 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: logstash-output-datahub
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.0. | 
| 4 | 
            +
              version: 1.0.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Aliyun
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2017-06-14 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: stud
         | 
| @@ -31,9 +31,6 @@ dependencies: | |
| 31 31 | 
             
                - - ">="
         | 
| 32 32 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 33 | 
             
                    version: 2.0.0
         | 
| 34 | 
            -
                - - "<"
         | 
| 35 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 36 | 
            -
                    version: 3.0.0
         | 
| 37 34 | 
             
              type: :runtime
         | 
| 38 35 | 
             
              prerelease: false
         | 
| 39 36 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| @@ -41,9 +38,6 @@ dependencies: | |
| 41 38 | 
             
                - - ">="
         | 
| 42 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 43 40 | 
             
                    version: 2.0.0
         | 
| 44 | 
            -
                - - "<"
         | 
| 45 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 46 | 
            -
                    version: 3.0.0
         | 
| 47 41 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 48 42 | 
             
              name: logstash-codec-plain
         | 
| 49 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -126,7 +120,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 126 120 | 
             
                  version: '0'
         | 
| 127 121 | 
             
            requirements: []
         | 
| 128 122 | 
             
            rubyforge_project: 
         | 
| 129 | 
            -
            rubygems_version: 2. | 
| 123 | 
            +
            rubygems_version: 2.6.10
         | 
| 130 124 | 
             
            signing_key: 
         | 
| 131 125 | 
             
            specification_version: 4
         | 
| 132 126 | 
             
            summary: This aliyun-datahub output plugin.
         |