http-log-parser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +30 -0
- data/Rakefile +36 -0
- data/lib/http/parser.rb +96 -0
- data/lib/http_log_parser.rb +4 -0
- metadata +65 -0
    
        data/README.rdoc
    ADDED
    
    | @@ -0,0 +1,30 @@ | |
| 1 | 
            +
            = LogParser
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            == Introduction
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            This gem provides an easy to use parser to various HTTP log formats.
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            == Installation
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Just run:
         | 
| 10 | 
            +
             | 
| 11 | 
            +
                gem install http-log-parser
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            That should take care of it.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            == Usage
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                require 'http_log_parser'
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
                parser = HttpLogParser.new
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                File.open('/path/to.log', 'r') do |file|
         | 
| 22 | 
            +
                  while(line = file.gets)
         | 
| 23 | 
            +
                    parsed_data = parser.parse_line(line)
         | 
| 24 | 
            +
                    p parsed_data
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            == License
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            This code is made availablie under the MIT license. It is based on based on code from Jan Wikholm.
         | 
    
        data/Rakefile
    ADDED
    
    | @@ -0,0 +1,36 @@ | |
| 1 | 
            +
            require 'rubygems'
         | 
| 2 | 
            +
            require 'rake/gempackagetask'
         | 
| 3 | 
            +
            require 'rake/rdoctask'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            spec = Gem::Specification.new do |s|
         | 
| 6 | 
            +
                s.name      =   "http-log-parser"
         | 
| 7 | 
            +
                s.version   =   "0.0.1"
         | 
| 8 | 
            +
                s.author    =   "Torsten Curdt"
         | 
| 9 | 
            +
                s.email     =   "tcurdt at vafer.org"
         | 
| 10 | 
            +
                s.homepage  =   "http://github.com/tcurdt/http-log-parser"
         | 
| 11 | 
            +
                s.description = "HTTP log file parser"
         | 
| 12 | 
            +
                s.summary   =   "A package for parsing web server logs."
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                s.platform  =   Gem::Platform::RUBY
         | 
| 15 | 
            +
                s.has_rdoc  =   true
         | 
| 16 | 
            +
                s.extra_rdoc_files  =   ["README.rdoc"]
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                s.require_path  =   "lib"
         | 
| 19 | 
            +
                s.files     =   %w(README.rdoc Rakefile) + Dir.glob("lib/**/*")
         | 
| 20 | 
            +
            end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            Rake::GemPackageTask.new(spec) do |pkg|
         | 
| 23 | 
            +
                pkg.need_tar = true
         | 
| 24 | 
            +
            end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            Rake::RDocTask.new(:rdoc) do |rdoc|
         | 
| 27 | 
            +
              rdoc.rdoc_dir = 'rdoc'
         | 
| 28 | 
            +
              rdoc.title    = 'HttpLogParser'
         | 
| 29 | 
            +
              rdoc.options << '--line-numbers' << '--inline-source'
         | 
| 30 | 
            +
              rdoc.rdoc_files.include('README')
         | 
| 31 | 
            +
              rdoc.rdoc_files.include('lib/**/*.rb')
         | 
| 32 | 
            +
            end
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            task :default => "pkg/#{spec.name}-#{spec.version}.gem" do
         | 
| 35 | 
            +
                puts "generated latest version"
         | 
| 36 | 
            +
            end
         | 
    
        data/lib/http/parser.rb
    ADDED
    
    | @@ -0,0 +1,96 @@ | |
| 1 | 
            +
            class HttpLogFormat
         | 
| 2 | 
            +
              attr_reader :name, :format, :format_symbols, :format_regex
         | 
| 3 | 
            +
             | 
| 4 | 
            +
              DIRECTIVES = {
         | 
| 5 | 
            +
                'h' => [:ip, /\d+\.\d+\.\d+\.\d+/],
         | 
| 6 | 
            +
                'l' => [:auth, /.*?/],
         | 
| 7 | 
            +
                'u' => [:username, /.*?/],
         | 
| 8 | 
            +
                't' => [:datetime, /\[.*?\]/],
         | 
| 9 | 
            +
                'r' => [:request, /.*?/],
         | 
| 10 | 
            +
                's' => [:status, /\d+/],
         | 
| 11 | 
            +
                'b' => [:bytecount, /-|\d+/],
         | 
| 12 | 
            +
                'v' => [:domain, /.*?/],
         | 
| 13 | 
            +
                'i' => [:header_lines, /.*?/],
         | 
| 14 | 
            +
                'e' => [:errorlevel, /\[.*?\]/],
         | 
| 15 | 
            +
              }
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              def initialize(name, format)
         | 
| 18 | 
            +
                @name, @format = name, format
         | 
| 19 | 
            +
                parse_format(format)
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              def parse_format(format)
         | 
| 23 | 
            +
                format_directive = /%(.*?)(\{.*?\})?([#{[DIRECTIVES.keys.join('|')]}])([\s\\"]*)/
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                log_format_symbols = []
         | 
| 26 | 
            +
                format_regex = ""
         | 
| 27 | 
            +
                format.scan(format_directive) do |condition, subdirective, directive_char, ignored|
         | 
| 28 | 
            +
                  log_format, match_regex = process_directive(directive_char, subdirective, condition)
         | 
| 29 | 
            +
                  ignored.gsub!(/\s/, '\\s') unless ignored.nil?
         | 
| 30 | 
            +
                  log_format_symbols << log_format
         | 
| 31 | 
            +
                  format_regex << "(#{match_regex})#{ignored}"
         | 
| 32 | 
            +
                end
         | 
| 33 | 
            +
                @format_symbols = log_format_symbols
         | 
| 34 | 
            +
                @format_regex =  /^#{format_regex}/
         | 
| 35 | 
            +
              end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
              def process_directive(directive_char, subdirective, condition)
         | 
| 38 | 
            +
                directive = DIRECTIVES[directive_char]
         | 
| 39 | 
            +
                case directive_char
         | 
| 40 | 
            +
                when 'i'
         | 
| 41 | 
            +
                  log_format = subdirective[1...-1].downcase.tr('-', '_').to_sym
         | 
| 42 | 
            +
                  [log_format, directive[1].source]
         | 
| 43 | 
            +
                else
         | 
| 44 | 
            +
                  [directive[0], directive[1].source]
         | 
| 45 | 
            +
                end
         | 
| 46 | 
            +
              end
         | 
| 47 | 
            +
            end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            class HttpLogParser
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              LOG_FORMATS = {
         | 
| 52 | 
            +
                :common => '%h %l %u %t \"%r\" %>s %b',
         | 
| 53 | 
            +
                :common_with_virtual => '%v %h %l %u %t \"%r\" %>s %b',
         | 
| 54 | 
            +
                :combined => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
         | 
| 55 | 
            +
                :combined_with_virtual => '%v %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"',
         | 
| 56 | 
            +
                :combined_with_cookies => '%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\" \"%{Cookies}i\"'
         | 
| 57 | 
            +
              }
         | 
| 58 | 
            +
             | 
| 59 | 
            +
              attr_reader :known_formats
         | 
| 60 | 
            +
             | 
| 61 | 
            +
              def initialize
         | 
| 62 | 
            +
                @log_format = []
         | 
| 63 | 
            +
                initialize_known_formats
         | 
| 64 | 
            +
              end
         | 
| 65 | 
            +
             | 
| 66 | 
            +
              def initialize_known_formats
         | 
| 67 | 
            +
                @known_formats = {}
         | 
| 68 | 
            +
                LOG_FORMATS.each do |name, format|
         | 
| 69 | 
            +
                  @known_formats[name] = HttpLogFormat.new(name, format)
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
              end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
              def check_format(line)
         | 
| 74 | 
            +
                @known_formats.sort_by { |key, log_format| log_format.format_regex.source.size }.reverse.each { |key, log_format|
         | 
| 75 | 
            +
                  return key if line.match(log_format.format_regex)
         | 
| 76 | 
            +
                }
         | 
| 77 | 
            +
                return :unknown
         | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
              def parse_line(line)
         | 
| 81 | 
            +
                @format = check_format(line)
         | 
| 82 | 
            +
                log_format = @known_formats[@format]
         | 
| 83 | 
            +
                raise ArgumentError if log_format.nil? or line !~ log_format.format_regex
         | 
| 84 | 
            +
                data = line.scan(log_format.format_regex).flatten
         | 
| 85 | 
            +
                parsed_data = {}
         | 
| 86 | 
            +
                log_format.format_symbols.size.times do |i|
         | 
| 87 | 
            +
                  parsed_data[log_format.format_symbols[i]] = data[i]
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                parsed_data[:datetime] = parsed_data[:datetime][1...-1] if parsed_data[:datetime]
         | 
| 91 | 
            +
                parsed_data[:domain] = parsed_data[:ip] unless parsed_data[:domain]
         | 
| 92 | 
            +
                parsed_data[:format] = @format
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                parsed_data
         | 
| 95 | 
            +
              end
         | 
| 96 | 
            +
            end
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,65 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification 
         | 
| 2 | 
            +
            name: http-log-parser
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            +
              prerelease: false
         | 
| 5 | 
            +
              segments: 
         | 
| 6 | 
            +
              - 0
         | 
| 7 | 
            +
              - 0
         | 
| 8 | 
            +
              - 1
         | 
| 9 | 
            +
              version: 0.0.1
         | 
| 10 | 
            +
            platform: ruby
         | 
| 11 | 
            +
            authors: 
         | 
| 12 | 
            +
            - Torsten Curdt
         | 
| 13 | 
            +
            autorequire: 
         | 
| 14 | 
            +
            bindir: bin
         | 
| 15 | 
            +
            cert_chain: []
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            date: 2010-04-22 00:00:00 +02:00
         | 
| 18 | 
            +
            default_executable: 
         | 
| 19 | 
            +
            dependencies: []
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            description: HTTP log file parser
         | 
| 22 | 
            +
            email: tcurdt at vafer.org
         | 
| 23 | 
            +
            executables: []
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            extensions: []
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            extra_rdoc_files: 
         | 
| 28 | 
            +
            - README.rdoc
         | 
| 29 | 
            +
            files: 
         | 
| 30 | 
            +
            - README.rdoc
         | 
| 31 | 
            +
            - Rakefile
         | 
| 32 | 
            +
            - lib/http/parser.rb
         | 
| 33 | 
            +
            - lib/http_log_parser.rb
         | 
| 34 | 
            +
            has_rdoc: true
         | 
| 35 | 
            +
            homepage: http://github.com/tcurdt/http-log-parser
         | 
| 36 | 
            +
            licenses: []
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            post_install_message: 
         | 
| 39 | 
            +
            rdoc_options: []
         | 
| 40 | 
            +
             | 
| 41 | 
            +
            require_paths: 
         | 
| 42 | 
            +
            - lib
         | 
| 43 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement 
         | 
| 44 | 
            +
              requirements: 
         | 
| 45 | 
            +
              - - ">="
         | 
| 46 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 47 | 
            +
                  segments: 
         | 
| 48 | 
            +
                  - 0
         | 
| 49 | 
            +
                  version: "0"
         | 
| 50 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement 
         | 
| 51 | 
            +
              requirements: 
         | 
| 52 | 
            +
              - - ">="
         | 
| 53 | 
            +
                - !ruby/object:Gem::Version 
         | 
| 54 | 
            +
                  segments: 
         | 
| 55 | 
            +
                  - 0
         | 
| 56 | 
            +
                  version: "0"
         | 
| 57 | 
            +
            requirements: []
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            rubyforge_project: 
         | 
| 60 | 
            +
            rubygems_version: 1.3.6
         | 
| 61 | 
            +
            signing_key: 
         | 
| 62 | 
            +
            specification_version: 3
         | 
| 63 | 
            +
            summary: A package for parsing web server logs.
         | 
| 64 | 
            +
            test_files: []
         | 
| 65 | 
            +
             |