td 0.10.38 → 0.10.39
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +5 -0
 - data/lib/td/command/acl.rb +1 -1
 - data/lib/td/command/bulk_import.rb +134 -4
 - data/lib/td/command/list.rb +2 -0
 - data/lib/td/file_reader.rb +324 -0
 - data/lib/td/version.rb +1 -1
 - metadata +3 -2
 
    
        data/ChangeLog
    CHANGED
    
    
    
        data/lib/td/command/acl.rb
    CHANGED
    
    | 
         @@ -35,7 +35,7 @@ module Command 
     | 
|
| 
       35 
35 
     | 
    
         | 
| 
       36 
36 
     | 
    
         
             
                client.grant_access_control(subject, action, scope, grant_option)
         
     | 
| 
       37 
37 
     | 
    
         | 
| 
       38 
     | 
    
         
            -
                $stderr.puts "Access control [#{subject} #{action} #{scope}] is created #{grant_option 
     | 
| 
      
 38 
     | 
    
         
            +
                $stderr.puts "Access control [#{subject} #{action} #{scope}] is created #{grant_option ? 'with' : 'without'} grant option."
         
     | 
| 
       39 
39 
     | 
    
         
             
              end
         
     | 
| 
       40 
40 
     | 
    
         | 
| 
       41 
41 
     | 
    
         
             
              def acl_revoke(op)
         
     | 
| 
         @@ -85,17 +85,48 @@ module Command 
     | 
|
| 
       85 
85 
     | 
    
         
             
              end
         
     | 
| 
       86 
86 
     | 
    
         | 
| 
       87 
87 
     | 
    
         
             
              def bulk_import_upload_part(op)
         
     | 
| 
       88 
     | 
    
         
            -
                 
     | 
| 
      
 88 
     | 
    
         
            +
                retry_limit = 10
         
     | 
| 
      
 89 
     | 
    
         
            +
                retry_wait = 1
         
     | 
| 
       89 
90 
     | 
    
         | 
| 
       90 
     | 
    
         
            -
                 
     | 
| 
      
 91 
     | 
    
         
            +
                name, part_name, path = op.cmd_parse
         
     | 
| 
       91 
92 
     | 
    
         | 
| 
       92 
     | 
    
         
            -
                File.open(path, "rb") {| 
     | 
| 
       93 
     | 
    
         
            -
                   
     | 
| 
      
 93 
     | 
    
         
            +
                File.open(path, "rb") {|io|
         
     | 
| 
      
 94 
     | 
    
         
            +
                  bulk_import_upload_impl(name, part_name, io, io.size, retry_limit, retry_wait)
         
     | 
| 
       94 
95 
     | 
    
         
             
                }
         
     | 
| 
       95 
96 
     | 
    
         | 
| 
       96 
97 
     | 
    
         
             
                $stderr.puts "Part '#{part_name}' is uploaded."
         
     | 
| 
       97 
98 
     | 
    
         
             
              end
         
     | 
| 
       98 
99 
     | 
    
         | 
| 
      
 100 
     | 
    
         
            +
              def bulk_import_upload_parts(op)
         
     | 
| 
      
 101 
     | 
    
         
            +
                retry_limit = 10
         
     | 
| 
      
 102 
     | 
    
         
            +
                retry_wait = 1
         
     | 
| 
      
 103 
     | 
    
         
            +
                suffix_count = 0
         
     | 
| 
      
 104 
     | 
    
         
            +
                part_prefix = ""
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
                op.on('-P', '--prefix NAME', 'add prefix to parts name') {|s|
         
     | 
| 
      
 107 
     | 
    
         
            +
                  part_prefix = s
         
     | 
| 
      
 108 
     | 
    
         
            +
                }
         
     | 
| 
      
 109 
     | 
    
         
            +
                op.on('-s', '--use-suffix COUNT', 'use COUNT number of . (dots) in the source file name to the parts name', Integer) {|i|
         
     | 
| 
      
 110 
     | 
    
         
            +
                  suffix_count = i
         
     | 
| 
      
 111 
     | 
    
         
            +
                }
         
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
                name, *files = op.cmd_parse
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                files.each {|ifname|
         
     | 
| 
      
 116 
     | 
    
         
            +
                  basename = File.basename(ifname)
         
     | 
| 
      
 117 
     | 
    
         
            +
                  part_name = part_prefix + basename.split('.')[0..suffix_count].join('.')
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
                  File.open(ifname, "rb") {|io|
         
     | 
| 
      
 120 
     | 
    
         
            +
                    size = io.size
         
     | 
| 
      
 121 
     | 
    
         
            +
                    $stderr.puts "Uploading '#{ifname}' -> '#{part_name}'... (#{size} bytes)"
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
                    bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
         
     | 
| 
      
 124 
     | 
    
         
            +
                  }
         
     | 
| 
      
 125 
     | 
    
         
            +
                }
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                $stderr.puts "done."
         
     | 
| 
      
 128 
     | 
    
         
            +
              end
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
       99 
130 
     | 
    
         
             
              def bulk_import_delete_part(op)
         
     | 
| 
       100 
131 
     | 
    
         
             
                name, part_name = op.cmd_parse
         
     | 
| 
       101 
132 
     | 
    
         | 
| 
         @@ -203,6 +234,105 @@ module Command 
     | 
|
| 
       203 
234 
     | 
    
         
             
                $stderr.puts "Bulk import session '#{name}' is unfrozen."
         
     | 
| 
       204 
235 
     | 
    
         
             
              end
         
     | 
| 
       205 
236 
     | 
    
         | 
| 
      
 237 
     | 
    
         
            +
             
     | 
| 
      
 238 
     | 
    
         
            +
              PART_SPLIT_SIZE = 16*1024*1024
         
     | 
| 
      
 239 
     | 
    
         
            +
             
     | 
| 
      
 240 
     | 
    
         
            +
              def bulk_import_prepare_part(op)
         
     | 
| 
      
 241 
     | 
    
         
            +
                outdir = nil
         
     | 
| 
      
 242 
     | 
    
         
            +
                split_size_kb = PART_SPLIT_SIZE / 1024  # kb
         
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
      
 244 
     | 
    
         
            +
                require 'td/file_reader'
         
     | 
| 
      
 245 
     | 
    
         
            +
                reader = FileReader.new
         
     | 
| 
      
 246 
     | 
    
         
            +
                reader.init_optparse(op)
         
     | 
| 
      
 247 
     | 
    
         
            +
             
     | 
| 
      
 248 
     | 
    
         
            +
                op.on('-s', '--split-size SIZE_IN_KB', "size of each parts (default: #{split_size_kb})", Integer) {|i|
         
     | 
| 
      
 249 
     | 
    
         
            +
                  split_size_kb = i
         
     | 
| 
      
 250 
     | 
    
         
            +
                }
         
     | 
| 
      
 251 
     | 
    
         
            +
                op.on('-o', '--output DIR', 'output directory') {|s|
         
     | 
| 
      
 252 
     | 
    
         
            +
                  outdir = s
         
     | 
| 
      
 253 
     | 
    
         
            +
                }
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
                *files = op.cmd_parse
         
     | 
| 
      
 256 
     | 
    
         
            +
             
     | 
| 
      
 257 
     | 
    
         
            +
                unless outdir
         
     | 
| 
      
 258 
     | 
    
         
            +
                  $stderr.puts "-o, --output DIR option is required."
         
     | 
| 
      
 259 
     | 
    
         
            +
                  exit 1
         
     | 
| 
      
 260 
     | 
    
         
            +
                end
         
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
      
 262 
     | 
    
         
            +
                split_size = split_size_kb * 1024
         
     | 
| 
      
 263 
     | 
    
         
            +
             
     | 
| 
      
 264 
     | 
    
         
            +
                require 'fileutils'
         
     | 
| 
      
 265 
     | 
    
         
            +
                FileUtils.mkdir_p(outdir)
         
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
      
 267 
     | 
    
         
            +
                require 'json'
         
     | 
| 
      
 268 
     | 
    
         
            +
                require 'msgpack'
         
     | 
| 
      
 269 
     | 
    
         
            +
                require 'zlib'
         
     | 
| 
      
 270 
     | 
    
         
            +
             
     | 
| 
      
 271 
     | 
    
         
            +
                error = Proc.new {|reason,data|
         
     | 
| 
      
 272 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 273 
     | 
    
         
            +
                    $stderr.puts "#{reason}: #{data.to_json}"
         
     | 
| 
      
 274 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 275 
     | 
    
         
            +
                    $stderr.puts "#{reason}"
         
     | 
| 
      
 276 
     | 
    
         
            +
                  end
         
     | 
| 
      
 277 
     | 
    
         
            +
                }
         
     | 
| 
      
 278 
     | 
    
         
            +
             
     | 
| 
      
 279 
     | 
    
         
            +
                files.each {|ifname|
         
     | 
| 
      
 280 
     | 
    
         
            +
                  $stderr.puts "Processing #{ifname}..."
         
     | 
| 
      
 281 
     | 
    
         
            +
                  record_num = 0
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
                  basename = File.basename(ifname).split('.').first
         
     | 
| 
      
 284 
     | 
    
         
            +
                  File.open(ifname) {|io|
         
     | 
| 
      
 285 
     | 
    
         
            +
                    of_index = 0
         
     | 
| 
      
 286 
     | 
    
         
            +
                    out = nil
         
     | 
| 
      
 287 
     | 
    
         
            +
                    zout = nil
         
     | 
| 
      
 288 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 289 
     | 
    
         
            +
                      reader.parse(io, error) {|record|
         
     | 
| 
      
 290 
     | 
    
         
            +
                        if zout == nil
         
     | 
| 
      
 291 
     | 
    
         
            +
                          ofname = "#{basename}_#{of_index}.msgpack.gz"
         
     | 
| 
      
 292 
     | 
    
         
            +
                          $stderr.puts "  Preparing part \"#{basename}_#{of_index}\"..."
         
     | 
| 
      
 293 
     | 
    
         
            +
                          out = File.open("#{outdir}/#{ofname}", 'wb')
         
     | 
| 
      
 294 
     | 
    
         
            +
                          zout = Zlib::GzipWriter.new(out)
         
     | 
| 
      
 295 
     | 
    
         
            +
             
     | 
| 
      
 296 
     | 
    
         
            +
                          t = record['time']
         
     | 
| 
      
 297 
     | 
    
         
            +
                          $stderr.puts "  sample: #{Time.at(t).utc} #{record.to_json}"
         
     | 
| 
      
 298 
     | 
    
         
            +
                        end
         
     | 
| 
      
 299 
     | 
    
         
            +
             
     | 
| 
      
 300 
     | 
    
         
            +
                        zout.write(record.to_msgpack)
         
     | 
| 
      
 301 
     | 
    
         
            +
                        record_num += 1
         
     | 
| 
      
 302 
     | 
    
         
            +
             
     | 
| 
      
 303 
     | 
    
         
            +
                        if out.size > split_size
         
     | 
| 
      
 304 
     | 
    
         
            +
                          zout.close
         
     | 
| 
      
 305 
     | 
    
         
            +
                          of_index += 1
         
     | 
| 
      
 306 
     | 
    
         
            +
                          out = nil
         
     | 
| 
      
 307 
     | 
    
         
            +
                          zout = nil
         
     | 
| 
      
 308 
     | 
    
         
            +
                        end
         
     | 
| 
      
 309 
     | 
    
         
            +
                      }
         
     | 
| 
      
 310 
     | 
    
         
            +
                    ensure
         
     | 
| 
      
 311 
     | 
    
         
            +
                      if zout
         
     | 
| 
      
 312 
     | 
    
         
            +
                        zout.close
         
     | 
| 
      
 313 
     | 
    
         
            +
                        zout = nil
         
     | 
| 
      
 314 
     | 
    
         
            +
                      end
         
     | 
| 
      
 315 
     | 
    
         
            +
                    end
         
     | 
| 
      
 316 
     | 
    
         
            +
                    $stderr.puts "  #{ifname}: #{record_num} entries."
         
     | 
| 
      
 317 
     | 
    
         
            +
                  }
         
     | 
| 
      
 318 
     | 
    
         
            +
                }
         
     | 
| 
      
 319 
     | 
    
         
            +
              end
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
              private
         
     | 
| 
      
 322 
     | 
    
         
            +
              def bulk_import_upload_impl(name, part_name, io, size, retry_limit, retry_wait)
         
     | 
| 
      
 323 
     | 
    
         
            +
                begin
         
     | 
| 
      
 324 
     | 
    
         
            +
                  client = get_client
         
     | 
| 
      
 325 
     | 
    
         
            +
                  client.bulk_import_upload_part(name, part_name, io, size)
         
     | 
| 
      
 326 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 327 
     | 
    
         
            +
                  if retry_limit > 0
         
     | 
| 
      
 328 
     | 
    
         
            +
                    retry_limit -= 1
         
     | 
| 
      
 329 
     | 
    
         
            +
                    $stderr.puts "#{$!}; retrying '#{part_name}'..."
         
     | 
| 
      
 330 
     | 
    
         
            +
                    sleep retry_wait
         
     | 
| 
      
 331 
     | 
    
         
            +
                    retry
         
     | 
| 
      
 332 
     | 
    
         
            +
                  end
         
     | 
| 
      
 333 
     | 
    
         
            +
                  raise
         
     | 
| 
      
 334 
     | 
    
         
            +
                end
         
     | 
| 
      
 335 
     | 
    
         
            +
              end
         
     | 
| 
       206 
336 
     | 
    
         
             
            end
         
     | 
| 
       207 
337 
     | 
    
         
             
            end
         
     | 
| 
       208 
338 
     | 
    
         | 
    
        data/lib/td/command/list.rb
    CHANGED
    
    | 
         @@ -228,7 +228,9 @@ module List 
     | 
|
| 
       228 
228 
     | 
    
         
             
              add_list 'bulk_import:list', %w[], 'List bulk import sessions', 'bulk_import:list'
         
     | 
| 
       229 
229 
     | 
    
         
             
              add_list 'bulk_import:show', %w[name], 'Show list of uploaded parts', 'bulk_import:show'
         
     | 
| 
       230 
230 
     | 
    
         
             
              add_list 'bulk_import:create', %w[name db table], 'Create a new bulk import session to the the table', 'bulk_import:create logs_201201 example_db event_logs'
         
     | 
| 
      
 231 
     | 
    
         
            +
              add_list 'bulk_import:prepare_part', %w[files_], 'Convert files into part file format', 'bulk_import:prepare_part logs/*.csv --format csv --columns time,uid,price,count --time-column "time" -o parts/'
         
     | 
| 
       231 
232 
     | 
    
         
             
              add_list 'bulk_import:upload_part', %w[name id path.msgpack.gz], 'Upload or re-upload a file into a bulk import session', 'bulk_import:upload_part logs_201201 01h data-201201-01.msgpack.gz'
         
     | 
| 
      
 233 
     | 
    
         
            +
              add_list 'bulk_import:upload_parts', %w[name files_], 'Upload or re-upload files into a bulk import session', 'bulk_import:upload_parts parts/* --prefix logs_'
         
     | 
| 
       232 
234 
     | 
    
         
             
              add_list 'bulk_import:delete_part', %w[name id], 'Delete a uploaded file from a bulk import session', 'bulk_import:delete_part logs_201201 01h'
         
     | 
| 
       233 
235 
     | 
    
         
             
              add_list 'bulk_import:perform', %w[name], 'Start to validate and convert uploaded files', 'bulk_import:perform logs_201201'
         
     | 
| 
       234 
236 
     | 
    
         
             
              add_list 'bulk_import:error_records', %w[name], 'Show records which did not pass validations', 'bulk_import:error_records logs_201201'
         
     | 
| 
         @@ -0,0 +1,324 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
             
     | 
| 
      
 2 
     | 
    
         
            +
            module TreasureData
         
     | 
| 
      
 3 
     | 
    
         
            +
              class FileReader
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
                class MessagePackParsingReader
         
     | 
| 
      
 6 
     | 
    
         
            +
                  def initialize(io, error, opts)
         
     | 
| 
      
 7 
     | 
    
         
            +
                    require 'msgpack'
         
     | 
| 
      
 8 
     | 
    
         
            +
                    @io = io
         
     | 
| 
      
 9 
     | 
    
         
            +
                    @error = error
         
     | 
| 
      
 10 
     | 
    
         
            +
                    @u = MessagePack::Unpacker.new(@io)
         
     | 
| 
      
 11 
     | 
    
         
            +
                  end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 14 
     | 
    
         
            +
                    @u.next
         
     | 
| 
      
 15 
     | 
    
         
            +
                  end
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                class LineReader
         
     | 
| 
      
 19 
     | 
    
         
            +
                  def initialize(io, error, opts)
         
     | 
| 
      
 20 
     | 
    
         
            +
                    if encoding = opts[:encoding]
         
     | 
| 
      
 21 
     | 
    
         
            +
                      io.external_encoding = encoding
         
     | 
| 
      
 22 
     | 
    
         
            +
                    end
         
     | 
| 
      
 23 
     | 
    
         
            +
                    #@delimiter = opts[:line_delimiter_expr] || /\r?\n/
         
     | 
| 
      
 24 
     | 
    
         
            +
                    @io = io
         
     | 
| 
      
 25 
     | 
    
         
            +
                    @error = error
         
     | 
| 
      
 26 
     | 
    
         
            +
                  end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  def next_row
         
     | 
| 
      
 29 
     | 
    
         
            +
                    @io.readline($/).chomp
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
                end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                class DelimiterParser
         
     | 
| 
      
 34 
     | 
    
         
            +
                  def initialize(reader, error, opts)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    @reader = reader
         
     | 
| 
      
 36 
     | 
    
         
            +
                    @delimiter_expr = opts[:delimiter_expr]
         
     | 
| 
      
 37 
     | 
    
         
            +
                    @null_expr = opts[:null_expr]
         
     | 
| 
      
 38 
     | 
    
         
            +
                    # TODO
         
     | 
| 
      
 39 
     | 
    
         
            +
                    #@escape_char = opts[:escape_char]
         
     | 
| 
      
 40 
     | 
    
         
            +
                    #@quote_char = opts[:quote_char]
         
     | 
| 
      
 41 
     | 
    
         
            +
                  end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 44 
     | 
    
         
            +
                    row = @reader.next_row
         
     | 
| 
      
 45 
     | 
    
         
            +
                    array = row.split(@delimiter_expr)
         
     | 
| 
      
 46 
     | 
    
         
            +
                    array.map! {|x|
         
     | 
| 
      
 47 
     | 
    
         
            +
                      @null_expr =~ x ? nil : x
         
     | 
| 
      
 48 
     | 
    
         
            +
                    }
         
     | 
| 
      
 49 
     | 
    
         
            +
                  end
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                class JSONParser
         
     | 
| 
      
 53 
     | 
    
         
            +
                  def initialize(reader, error, opts)
         
     | 
| 
      
 54 
     | 
    
         
            +
                    @reader = reader
         
     | 
| 
      
 55 
     | 
    
         
            +
                    @error = error
         
     | 
| 
      
 56 
     | 
    
         
            +
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 59 
     | 
    
         
            +
                    while true
         
     | 
| 
      
 60 
     | 
    
         
            +
                      line = @reader.next_row
         
     | 
| 
      
 61 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 62 
     | 
    
         
            +
                        return JSON.parse(line)
         
     | 
| 
      
 63 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 64 
     | 
    
         
            +
                        @error.call("invalid json format: #{$!}", line)
         
     | 
| 
      
 65 
     | 
    
         
            +
                        next
         
     | 
| 
      
 66 
     | 
    
         
            +
                      end
         
     | 
| 
      
 67 
     | 
    
         
            +
                    end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  end
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                #class ApacheParser
         
     | 
| 
      
 72 
     | 
    
         
            +
                #  REGEXP = /^([^ ]*) [^ ]* ([^ ]*) \[([^\]]*)\] "(\S+)(?: +([^ ]*) +\S*)?" ([^ ]*) ([^ ]*)(?: "([^\"]*)" "([^\"]*)")?$/
         
     | 
| 
      
 73 
     | 
    
         
            +
                #
         
     | 
| 
      
 74 
     | 
    
         
            +
                #  def initialize(reader, error, opts)
         
     | 
| 
      
 75 
     | 
    
         
            +
                #    @reader = reader
         
     | 
| 
      
 76 
     | 
    
         
            +
                #  end
         
     | 
| 
      
 77 
     | 
    
         
            +
                #
         
     | 
| 
      
 78 
     | 
    
         
            +
                #  def next
         
     | 
| 
      
 79 
     | 
    
         
            +
                #    while true
         
     | 
| 
      
 80 
     | 
    
         
            +
                #      m = REGEXP.match(@reader.next_row)
         
     | 
| 
      
 81 
     | 
    
         
            +
                #      if m
         
     | 
| 
      
 82 
     | 
    
         
            +
                #        h = {
         
     | 
| 
      
 83 
     | 
    
         
            +
                #          'host' => m[1],
         
     | 
| 
      
 84 
     | 
    
         
            +
                #          'user' => m[2],
         
     | 
| 
      
 85 
     | 
    
         
            +
                #          'time' => m[3],
         
     | 
| 
      
 86 
     | 
    
         
            +
                #          'method' => m[4],
         
     | 
| 
      
 87 
     | 
    
         
            +
                #          'path' => m[5],
         
     | 
| 
      
 88 
     | 
    
         
            +
                #          'code' => m[6],
         
     | 
| 
      
 89 
     | 
    
         
            +
                #          'size' => m[7].to_i,
         
     | 
| 
      
 90 
     | 
    
         
            +
                #          'referer' => m[8],
         
     | 
| 
      
 91 
     | 
    
         
            +
                #          'agent' => m[9],
         
     | 
| 
      
 92 
     | 
    
         
            +
                #        }
         
     | 
| 
      
 93 
     | 
    
         
            +
                #        return h
         
     | 
| 
      
 94 
     | 
    
         
            +
                #      end
         
     | 
| 
      
 95 
     | 
    
         
            +
                #    end
         
     | 
| 
      
 96 
     | 
    
         
            +
                #  end
         
     | 
| 
      
 97 
     | 
    
         
            +
                #end
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                class AutoTypeConvertParserFilter
         
     | 
| 
      
 100 
     | 
    
         
            +
                  def initialize(parser, error)
         
     | 
| 
      
 101 
     | 
    
         
            +
                    @parser = parser
         
     | 
| 
      
 102 
     | 
    
         
            +
                  end
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 105 
     | 
    
         
            +
                    array = @parser.next
         
     | 
| 
      
 106 
     | 
    
         
            +
                    array.map! {|s|
         
     | 
| 
      
 107 
     | 
    
         
            +
                      # nil.to_i == 0 != nil.to_s
         
     | 
| 
      
 108 
     | 
    
         
            +
                      i = s.to_i
         
     | 
| 
      
 109 
     | 
    
         
            +
                      i.to_s == s ? i : s
         
     | 
| 
      
 110 
     | 
    
         
            +
                    }
         
     | 
| 
      
 111 
     | 
    
         
            +
                  end
         
     | 
| 
      
 112 
     | 
    
         
            +
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                class HashBuilder
         
     | 
| 
      
 115 
     | 
    
         
            +
                  def initialize(parser, error, columns)
         
     | 
| 
      
 116 
     | 
    
         
            +
                    @parser = parser
         
     | 
| 
      
 117 
     | 
    
         
            +
                    @columns = columns
         
     | 
| 
      
 118 
     | 
    
         
            +
                  end
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 121 
     | 
    
         
            +
                    array = @parser.next
         
     | 
| 
      
 122 
     | 
    
         
            +
                    # FIXME error handling
         
     | 
| 
      
 123 
     | 
    
         
            +
                    Hash[@columns.zip(array)]
         
     | 
| 
      
 124 
     | 
    
         
            +
                  end
         
     | 
| 
      
 125 
     | 
    
         
            +
                end
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                class TimeParserFilter
         
     | 
| 
      
 128 
     | 
    
         
            +
                  def initialize(parser, error, opts)
         
     | 
| 
      
 129 
     | 
    
         
            +
                    require 'time'
         
     | 
| 
      
 130 
     | 
    
         
            +
                    @parser = parser
         
     | 
| 
      
 131 
     | 
    
         
            +
                    @error = error
         
     | 
| 
      
 132 
     | 
    
         
            +
                    @time_column = opts[:time_column]
         
     | 
| 
      
 133 
     | 
    
         
            +
                    unless @time_column
         
     | 
| 
      
 134 
     | 
    
         
            +
                      raise '-t, --time-column NAME option is required'
         
     | 
| 
      
 135 
     | 
    
         
            +
                    end
         
     | 
| 
      
 136 
     | 
    
         
            +
                    @time_format = opts[:time_format]
         
     | 
| 
      
 137 
     | 
    
         
            +
                  end
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                  def next
         
     | 
| 
      
 140 
     | 
    
         
            +
                    while true
         
     | 
| 
      
 141 
     | 
    
         
            +
                      row = @parser.next
         
     | 
| 
      
 142 
     | 
    
         
            +
                      tval = row[@time_column]
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                      unless tval
         
     | 
| 
      
 145 
     | 
    
         
            +
                        @error.call("time column '#{@time_column}' is missing", row)
         
     | 
| 
      
 146 
     | 
    
         
            +
                        next
         
     | 
| 
      
 147 
     | 
    
         
            +
                      end
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
                      begin
         
     | 
| 
      
 150 
     | 
    
         
            +
                        if tf = @time_format
         
     | 
| 
      
 151 
     | 
    
         
            +
                          row['time'] = parse_time(tval, tf).to_i
         
     | 
| 
      
 152 
     | 
    
         
            +
                        elsif tval.is_a?(Integer)
         
     | 
| 
      
 153 
     | 
    
         
            +
                            row['time'] = tval
         
     | 
| 
      
 154 
     | 
    
         
            +
                        else
         
     | 
| 
      
 155 
     | 
    
         
            +
                          row['time'] = Time.parse(tval).to_i
         
     | 
| 
      
 156 
     | 
    
         
            +
                        end
         
     | 
| 
      
 157 
     | 
    
         
            +
                        return row
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
                      rescue
         
     | 
| 
      
 160 
     | 
    
         
            +
                        @error.call("invalid time format '#{tval}': #{$!}", row)
         
     | 
| 
      
 161 
     | 
    
         
            +
                        next
         
     | 
| 
      
 162 
     | 
    
         
            +
                      end
         
     | 
| 
      
 163 
     | 
    
         
            +
                    end
         
     | 
| 
      
 164 
     | 
    
         
            +
                  end
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
                  if Time.respond_to?(:strptime)
         
     | 
| 
      
 167 
     | 
    
         
            +
                    def parse_time(value, format)
         
     | 
| 
      
 168 
     | 
    
         
            +
                      Time.strptime(value, format)
         
     | 
| 
      
 169 
     | 
    
         
            +
                    end
         
     | 
| 
      
 170 
     | 
    
         
            +
                  else
         
     | 
| 
      
 171 
     | 
    
         
            +
                    def parse_time(value, format)
         
     | 
| 
      
 172 
     | 
    
         
            +
                      Time.parse(DateTime.strptime(value, format).to_s)
         
     | 
| 
      
 173 
     | 
    
         
            +
                    end
         
     | 
| 
      
 174 
     | 
    
         
            +
                  end
         
     | 
| 
      
 175 
     | 
    
         
            +
                end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                def initialize
         
     | 
| 
      
 178 
     | 
    
         
            +
                  @format = "text"
         
     | 
| 
      
 179 
     | 
    
         
            +
                  @default_opts = {
         
     | 
| 
      
 180 
     | 
    
         
            +
                    :delimiter_expr => /\t|,/,
         
     | 
| 
      
 181 
     | 
    
         
            +
                    #:line_delimiter_expr => /\r?\n/,
         
     | 
| 
      
 182 
     | 
    
         
            +
                    :null_expr => /\A(?:\\N|\-|)\z/,
         
     | 
| 
      
 183 
     | 
    
         
            +
                    #:quote_char => "\"",
         
     | 
| 
      
 184 
     | 
    
         
            +
                  }
         
     | 
| 
      
 185 
     | 
    
         
            +
                  @opts = {}
         
     | 
| 
      
 186 
     | 
    
         
            +
                  @parser_class = nil
         
     | 
| 
      
 187 
     | 
    
         
            +
                end
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
                attr_reader :default_opts, :opts
         
     | 
| 
      
 190 
     | 
    
         
            +
                attr_accessor :parser_class
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                def init_optparse(op)
         
     | 
| 
      
 193 
     | 
    
         
            +
                  op.on('-f', '--format NAME', "source file format") {|s|
         
     | 
| 
      
 194 
     | 
    
         
            +
                    set_format_template(s)
         
     | 
| 
      
 195 
     | 
    
         
            +
                  }
         
     | 
| 
      
 196 
     | 
    
         
            +
                  op.on('-h', '--columns NAME,NAME,...', 'column names') {|s|
         
     | 
| 
      
 197 
     | 
    
         
            +
                    @opts[:column_names] = s.split(',')
         
     | 
| 
      
 198 
     | 
    
         
            +
                  }
         
     | 
| 
      
 199 
     | 
    
         
            +
                  op.on('-H', '--column-header', 'first line includes column names', TrueClass) {|b|
         
     | 
| 
      
 200 
     | 
    
         
            +
                    @opts[:column_header] = b
         
     | 
| 
      
 201 
     | 
    
         
            +
                  }
         
     | 
| 
      
 202 
     | 
    
         
            +
                  op.on('-d', '--delimiter REGEX', "delimiter between columns (default: #{@default_opts[:delimiter_expr].inspect[1..-2]}") {|s|
         
     | 
| 
      
 203 
     | 
    
         
            +
                    @opts[:delimiter_expr] = Regexp.new(s)
         
     | 
| 
      
 204 
     | 
    
         
            +
                  }
         
     | 
| 
      
 205 
     | 
    
         
            +
                  #op.on('-D', '--line-delimiter REGEX', "delimiter between rows (default: #{@default_opts[:line_delimiter_expr].inspect[1..-2]})") {|s|
         
     | 
| 
      
 206 
     | 
    
         
            +
                  #  @opts[:line_delimiter_expr] = Regexp.new(s)
         
     | 
| 
      
 207 
     | 
    
         
            +
                  #}
         
     | 
| 
      
 208 
     | 
    
         
            +
                  op.on('-N', '--null REGEX', "null expression (default: #{@default_opts[:null_expr].inspect[1..-2]}") {|s|
         
     | 
| 
      
 209 
     | 
    
         
            +
                    @opts[:null_expr] = Regexp.new(s)
         
     | 
| 
      
 210 
     | 
    
         
            +
                  }
         
     | 
| 
      
 211 
     | 
    
         
            +
                  # TODO
         
     | 
| 
      
 212 
     | 
    
         
            +
                  #op.on('-E', '--escape CHAR', "escape character (default: no escape character)") {|s|
         
     | 
| 
      
 213 
     | 
    
         
            +
                  #  @opts[:escape_char] = s
         
     | 
| 
      
 214 
     | 
    
         
            +
                  #}
         
     | 
| 
      
 215 
     | 
    
         
            +
                  #op.on('-Q', '--quote CHAR', "quote character (default: #{@default_opts[:quote_char]}") {|s|
         
     | 
| 
      
 216 
     | 
    
         
            +
                  #  @opts[:quote_char] = s
         
     | 
| 
      
 217 
     | 
    
         
            +
                  #}
         
     | 
| 
      
 218 
     | 
    
         
            +
                  op.on('-S', '--all-string', 'disable automatic type conversion', TrueClass) {|b|
         
     | 
| 
      
 219 
     | 
    
         
            +
                    @opts[:all_string] = b
         
     | 
| 
      
 220 
     | 
    
         
            +
                  }
         
     | 
| 
      
 221 
     | 
    
         
            +
                  op.on('-t', '--time-column NAME', 'name of the time column (default: auto detect)') {|s|
         
     | 
| 
      
 222 
     | 
    
         
            +
                    @opts[:time_column] = s
         
     | 
| 
      
 223 
     | 
    
         
            +
                  }
         
     | 
| 
      
 224 
     | 
    
         
            +
                  op.on('-T', '--time-format FORMAT', 'strftime(3) format of the time column') {|s|
         
     | 
| 
      
 225 
     | 
    
         
            +
                    @opts[:time_format] = s
         
     | 
| 
      
 226 
     | 
    
         
            +
                  }
         
     | 
| 
      
 227 
     | 
    
         
            +
                  op.on('-e', '--encoding NAME', "text encoding") {|s|
         
     | 
| 
      
 228 
     | 
    
         
            +
                    @opts[:encoding] = s
         
     | 
| 
      
 229 
     | 
    
         
            +
                  }
         
     | 
| 
      
 230 
     | 
    
         
            +
                  op.on('-C', '--compress NAME', 'compression format name [plain, gzip] (default: auto detect)') {|s|
         
     | 
| 
      
 231 
     | 
    
         
            +
                    @opts[:compress] = s
         
     | 
| 
      
 232 
     | 
    
         
            +
                  }
         
     | 
| 
      
 233 
     | 
    
         
            +
                end
         
     | 
| 
      
 234 
     | 
    
         
            +
             
     | 
| 
      
 235 
     | 
    
         
            +
                def set_format_template(name)
         
     | 
| 
      
 236 
     | 
    
         
            +
                  case name
         
     | 
| 
      
 237 
     | 
    
         
            +
                  when 'csv'
         
     | 
| 
      
 238 
     | 
    
         
            +
                    @format = 'text'
         
     | 
| 
      
 239 
     | 
    
         
            +
                    @opts[:delimiter_expr] = /,/
         
     | 
| 
      
 240 
     | 
    
         
            +
                  when 'tsv'
         
     | 
| 
      
 241 
     | 
    
         
            +
                    @format = 'text'
         
     | 
| 
      
 242 
     | 
    
         
            +
                    @opts[:delimiter_expr] = /\t/
         
     | 
| 
      
 243 
     | 
    
         
            +
                  #when 'apache'
         
     | 
| 
      
 244 
     | 
    
         
            +
                  #  @format = 'apache'
         
     | 
| 
      
 245 
     | 
    
         
            +
                  #  @opts[:column_names] = ['host', 'user', 'time', 'method', 'path', 'code', 'size', 'referer', 'agent']
         
     | 
| 
      
 246 
     | 
    
         
            +
                  #  @opts[:null_expr] = /\A(?:\-|)\z/
         
     | 
| 
      
 247 
     | 
    
         
            +
                  #  @opts[:time_column] = 'time'
         
     | 
| 
      
 248 
     | 
    
         
            +
                  #  @opts[:time_format] = '%d/%b/%Y:%H:%M:%S %z'
         
     | 
| 
      
 249 
     | 
    
         
            +
                  when 'msgpack'
         
     | 
| 
      
 250 
     | 
    
         
            +
                    @format = 'msgpack'
         
     | 
| 
      
 251 
     | 
    
         
            +
                  when 'json'
         
     | 
| 
      
 252 
     | 
    
         
            +
                    @format = 'json'
         
     | 
| 
      
 253 
     | 
    
         
            +
                  else
         
     | 
| 
      
 254 
     | 
    
         
            +
                    raise "Unknown format: #{name}"
         
     | 
| 
      
 255 
     | 
    
         
            +
                  end
         
     | 
| 
      
 256 
     | 
    
         
            +
                end
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
                def compose_factory
         
     | 
| 
      
 259 
     | 
    
         
            +
                  opts = @default_opts.merge(@opts)
         
     | 
| 
      
 260 
     | 
    
         
            +
                  case @format
         
     | 
| 
      
 261 
     | 
    
         
            +
                  when 'text'
         
     | 
| 
      
 262 
     | 
    
         
            +
                    Proc.new {|io,error|
         
     | 
| 
      
 263 
     | 
    
         
            +
                      reader = LineReader.new(io, error, opts)
         
     | 
| 
      
 264 
     | 
    
         
            +
                      parser = DelimiterParser.new(reader, error, opts)
         
     | 
| 
      
 265 
     | 
    
         
            +
                      if opts[:column_header]
         
     | 
| 
      
 266 
     | 
    
         
            +
                        column_names = parser.next
         
     | 
| 
      
 267 
     | 
    
         
            +
                      elsif opts[:column_names]
         
     | 
| 
      
 268 
     | 
    
         
            +
                        column_names = opts[:column_names]
         
     | 
| 
      
 269 
     | 
    
         
            +
                      else
         
     | 
| 
      
 270 
     | 
    
         
            +
                        raise "--column-header or --columns option is required"
         
     | 
| 
      
 271 
     | 
    
         
            +
                      end
         
     | 
| 
      
 272 
     | 
    
         
            +
                      unless opts[:all_string]
         
     | 
| 
      
 273 
     | 
    
         
            +
                        parser = AutoTypeConvertParserFilter.new(parser, error)
         
     | 
| 
      
 274 
     | 
    
         
            +
                      end
         
     | 
| 
      
 275 
     | 
    
         
            +
                      parser = HashBuilder.new(parser, error, column_names)
         
     | 
| 
      
 276 
     | 
    
         
            +
                      parser = TimeParserFilter.new(parser, error, opts)
         
     | 
| 
      
 277 
     | 
    
         
            +
                    }
         
     | 
| 
      
 278 
     | 
    
         
            +
             
     | 
| 
      
 279 
     | 
    
         
            +
                  #when 'apache'
         
     | 
| 
      
 280 
     | 
    
         
            +
             
     | 
| 
      
 281 
     | 
    
         
            +
                  when 'json'
         
     | 
| 
      
 282 
     | 
    
         
            +
                    Proc.new {|io,error|
         
     | 
| 
      
 283 
     | 
    
         
            +
                      reader = LineReader.new(io, error, opts)
         
     | 
| 
      
 284 
     | 
    
         
            +
                      parser = JSONParser.new(reader, error, opts)
         
     | 
| 
      
 285 
     | 
    
         
            +
                      if opts[:column_header]
         
     | 
| 
      
 286 
     | 
    
         
            +
                        column_names = parser.next
         
     | 
| 
      
 287 
     | 
    
         
            +
                      elsif opts[:column_names]
         
     | 
| 
      
 288 
     | 
    
         
            +
                        column_names = opts[:column_names]
         
     | 
| 
      
 289 
     | 
    
         
            +
                      end
         
     | 
| 
      
 290 
     | 
    
         
            +
                      if column_names
         
     | 
| 
      
 291 
     | 
    
         
            +
                        parser = HashBuilder.new(parser, error, column_names)
         
     | 
| 
      
 292 
     | 
    
         
            +
                      end
         
     | 
| 
      
 293 
     | 
    
         
            +
                      parser = TimeParserFilter.new(parser, error, opts)
         
     | 
| 
      
 294 
     | 
    
         
            +
                    }
         
     | 
| 
      
 295 
     | 
    
         
            +
             
     | 
| 
      
 296 
     | 
    
         
            +
                  when 'msgpack'
         
     | 
| 
      
 297 
     | 
    
         
            +
                    Proc.new {|io,error|
         
     | 
| 
      
 298 
     | 
    
         
            +
                      parser = MessagePackParsingReader.new(io, error, opts)
         
     | 
| 
      
 299 
     | 
    
         
            +
                      if opts[:column_header]
         
     | 
| 
      
 300 
     | 
    
         
            +
                        column_names = parser.next
         
     | 
| 
      
 301 
     | 
    
         
            +
                      elsif opts[:column_names]
         
     | 
| 
      
 302 
     | 
    
         
            +
                        column_names = opts[:column_names]
         
     | 
| 
      
 303 
     | 
    
         
            +
                      end
         
     | 
| 
      
 304 
     | 
    
         
            +
                      if column_names
         
     | 
| 
      
 305 
     | 
    
         
            +
                        parser = HashBuilder.new(parser, error, column_names)
         
     | 
| 
      
 306 
     | 
    
         
            +
                      end
         
     | 
| 
      
 307 
     | 
    
         
            +
                      parser = TimeParserFilter.new(parser, error, opts)
         
     | 
| 
      
 308 
     | 
    
         
            +
                    }
         
     | 
| 
      
 309 
     | 
    
         
            +
                  end
         
     | 
| 
      
 310 
     | 
    
         
            +
                end
         
     | 
| 
      
 311 
     | 
    
         
            +
             
     | 
| 
      
 312 
     | 
    
         
            +
                def parse(io, error, &block)
         
     | 
| 
      
 313 
     | 
    
         
            +
                  factory = compose_factory
         
     | 
| 
      
 314 
     | 
    
         
            +
                  parser = factory.call(io, error)
         
     | 
| 
      
 315 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 316 
     | 
    
         
            +
                    while record = parser.next
         
     | 
| 
      
 317 
     | 
    
         
            +
                      block.call(record)
         
     | 
| 
      
 318 
     | 
    
         
            +
                    end
         
     | 
| 
      
 319 
     | 
    
         
            +
                  rescue EOFError
         
     | 
| 
      
 320 
     | 
    
         
            +
                  end
         
     | 
| 
      
 321 
     | 
    
         
            +
                end
         
     | 
| 
      
 322 
     | 
    
         
            +
             
     | 
| 
      
 323 
     | 
    
         
            +
              end
         
     | 
| 
      
 324 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/td/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: td
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.10. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.10.39
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,7 +9,7 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2012-08- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2012-08-27 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: msgpack
         
     | 
| 
         @@ -163,6 +163,7 @@ files: 
     | 
|
| 
       163 
163 
     | 
    
         
             
            - lib/td/compat_gzip_reader.rb
         
     | 
| 
       164 
164 
     | 
    
         
             
            - lib/td/config.rb
         
     | 
| 
       165 
165 
     | 
    
         
             
            - lib/td/distribution.rb
         
     | 
| 
      
 166 
     | 
    
         
            +
            - lib/td/file_reader.rb
         
     | 
| 
       166 
167 
     | 
    
         
             
            - lib/td/version.rb
         
     | 
| 
       167 
168 
     | 
    
         
             
            - ChangeLog
         
     | 
| 
       168 
169 
     | 
    
         
             
            - README.rdoc
         
     |