log_sense 1.3.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.org +46 -0
- data/Gemfile.lock +4 -4
- data/README.org +24 -10
- data/Rakefile +17 -3
- data/exe/log_sense +24 -16
- data/ip_locations/dbip-country-lite.sqlite3 +0 -0
- data/lib/log_sense/apache_data_cruncher.rb +30 -30
- data/lib/log_sense/apache_log_line_parser.rb +12 -13
- data/lib/log_sense/apache_log_parser.rb +44 -36
- data/lib/log_sense/emitter.rb +518 -15
- data/lib/log_sense/ip_locator.rb +26 -19
- data/lib/log_sense/options_parser.rb +35 -30
- data/lib/log_sense/rails_data_cruncher.rb +8 -4
- data/lib/log_sense/rails_log_parser.rb +108 -100
- data/lib/log_sense/templates/_command_invocation.html.erb +0 -4
- data/lib/log_sense/templates/_command_invocation.txt.erb +4 -3
- data/lib/log_sense/templates/_navigation.html.erb +21 -0
- data/lib/log_sense/templates/_output_table.html.erb +2 -7
- data/lib/log_sense/templates/_output_table.txt.erb +14 -0
- data/lib/log_sense/templates/_performance.html.erb +1 -1
- data/lib/log_sense/templates/_performance.txt.erb +8 -5
- data/lib/log_sense/templates/_report_data.html.erb +2 -2
- data/lib/log_sense/templates/_summary.html.erb +6 -1
- data/lib/log_sense/templates/_summary.txt.erb +11 -8
- data/lib/log_sense/templates/_warning.txt.erb +1 -0
- data/lib/log_sense/templates/apache.html.erb +14 -335
- data/lib/log_sense/templates/apache.txt.erb +22 -0
- data/lib/log_sense/templates/rails.html.erb +13 -174
- data/lib/log_sense/templates/rails.txt.erb +10 -60
- data/lib/log_sense/version.rb +1 -1
- metadata +6 -2
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: '0128717b2ba709bc5dfbb7b755762a757c575ad4307159910cc894aaa3b88f42'
         | 
| 4 | 
            +
              data.tar.gz: e05054b8eee79a439f5b077e60bc0d95a3e7706c550853333d7d631c458abf91
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 9d9e3dc495f7479292ae96d1bf6298f531258cae74df47ff705aea9613880b0d50aa6a19328f70685484ad1c606bd12a8fb7c87632fe5c9cbefefe4893d9bb4d
         | 
| 7 | 
            +
              data.tar.gz: b417049bcc119ed82ab4c33d007e15804ba85b2485308ca28448fba512814fc5e8b8b215310bfb5c8108fcdc87292322eefc6826b18718fcbc7fced29eea77cb
         | 
    
        data/CHANGELOG.org
    CHANGED
    
    | @@ -2,6 +2,52 @@ | |
| 2 2 | 
             
            #+AUTHOR: Adolfo Villafiorita
         | 
| 3 3 | 
             
            #+STARTUP: showall
         | 
| 4 4 |  | 
| 5 | 
            +
            * 1.5.0
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            - [User] Present Unique Visits / day as integer
         | 
| 8 | 
            +
            - [User] Added Country and Streaks report for rails
         | 
| 9 | 
            +
            - [User] Changed Streak report in Apache
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            - [Gem] Updated DBIP
         | 
| 12 | 
            +
            - [Gem] Updated Bundle  
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            - [Code] Refactored all reports, so that they are specified
         | 
| 15 | 
            +
              in the same way  
         | 
| 16 | 
            +
            - [Code] Refactor warning message in textual reports
         | 
| 17 | 
            +
            - [Code] Build HTML menu for report specification
         | 
| 18 | 
            +
            - [Code] Various refactoring passes on the code
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            * 1.4.1
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            - [User] New textual report for Apache
         | 
| 23 | 
            +
            - [User] New option -w sets maximum width of URL, Path, and
         | 
| 24 | 
            +
              Description columns in textual reports
         | 
| 25 | 
            +
            - [User] Removed option -i, since input filenames are now taken
         | 
| 26 | 
            +
              as direct arguments
         | 
| 27 | 
            +
            - [User] Allow multiple files in input
         | 
| 28 | 
            +
            - [Fixed] Complain if input format is not supported
         | 
| 29 | 
            +
            - [Code] Refactoring of reports to manage better output to
         | 
| 30 | 
            +
              multiple formats  
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            * 1.4.0
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            - [User] The Apache Log report now organizes page requests in four
         | 
| 35 | 
            +
              tables:
         | 
| 36 | 
            +
              - success on HTML pages
         | 
| 37 | 
            +
              - success on other resources
         | 
| 38 | 
            +
              - failures on HTML pages
         | 
| 39 | 
            +
              - failures on other resources
         | 
| 40 | 
            +
            - [User] Increased the default limit of pages in reports to 900
         | 
| 41 | 
            +
            - [User] The return status in now included in the page and resources
         | 
| 42 | 
            +
              reports
         | 
| 43 | 
            +
            - [User] The "Attack" table has been removed, since the data can be
         | 
| 44 | 
            +
              gotten from the previous tables
         | 
| 45 | 
            +
            - [Fixed] HTML pages are those with extension ".html" and ".htm"
         | 
| 46 | 
            +
            - [Fixed] Wrong data on summary table of the apache report has
         | 
| 47 | 
            +
              been fixed
         | 
| 48 | 
            +
            - [Fixed] Better JavaScript escaping to avoid log poisoning
         | 
| 49 | 
            +
            - [Fixed] Strengthened the Apache log parser
         | 
| 50 | 
            +
             | 
| 5 51 | 
             
            * 1.3.3 and 1.3.4
         | 
| 6 52 |  | 
| 7 53 | 
             
            - [Gem] Moved repository to Github and fixes to gemspec
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                log_sense (1. | 
| 4 | 
            +
                log_sense (1.4.2)
         | 
| 5 5 | 
             
                  browser
         | 
| 6 6 | 
             
                  ipaddr
         | 
| 7 7 | 
             
                  iso_country_codes
         | 
| @@ -13,9 +13,9 @@ GEM | |
| 13 13 | 
             
              specs:
         | 
| 14 14 | 
             
                browser (5.3.1)
         | 
| 15 15 | 
             
                byebug (11.1.3)
         | 
| 16 | 
            -
                ipaddr (1.2. | 
| 16 | 
            +
                ipaddr (1.2.4)
         | 
| 17 17 | 
             
                iso_country_codes (0.7.8)
         | 
| 18 | 
            -
                minitest (5. | 
| 18 | 
            +
                minitest (5.15.0)
         | 
| 19 19 | 
             
                rake (12.3.3)
         | 
| 20 20 | 
             
                sqlite3 (1.4.2)
         | 
| 21 21 | 
             
                terminal-table (3.0.2)
         | 
| @@ -32,4 +32,4 @@ DEPENDENCIES | |
| 32 32 | 
             
              rake (~> 12.0)
         | 
| 33 33 |  | 
| 34 34 | 
             
            BUNDLED WITH
         | 
| 35 | 
            -
               2. | 
| 35 | 
            +
               2.3.3
         | 
    
        data/README.org
    CHANGED
    
    | @@ -19,8 +19,6 @@ LogSense reports the following data: | |
| 19 19 | 
             
            - OS, browsers, and devices
         | 
| 20 20 | 
             
            - IP Country location, thanks to the DPIP lite country DB
         | 
| 21 21 | 
             
            - Streaks: resources accessed by a given IP over time
         | 
| 22 | 
            -
            - Potential attacks: access to resources which are not meant to be
         | 
| 23 | 
            -
              served by a web server serving static websites
         | 
| 24 22 | 
             
            - Performance of Rails requests
         | 
| 25 23 |  | 
| 26 24 | 
             
            Filters from the command line allow to analyze specific periods and
         | 
| @@ -33,6 +31,18 @@ And, of course, the compulsory screenshot: | |
| 33 31 | 
             
            #+ATTR_HTML: :width 80%
         | 
| 34 32 | 
             
            [[file:./apache-screenshot.png]]
         | 
| 35 33 |  | 
| 34 | 
            +
             | 
| 35 | 
            +
            * An important word of warning
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            [[https://owasp.org/www-community/attacks/Log_Injection][Log poisoning]] is a technique whereby attackers send requests with invalidated
         | 
| 38 | 
            +
            user input to forge log entries or inject malicious content into the logs.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            log_sense sanitizes entries of HTML reports, to try and protect from log
         | 
| 41 | 
            +
            poisoning.  *Log entries and URLs in SQLite3, however, are not sanitized*:
         | 
| 42 | 
            +
            they are stored and read from the log.  This is not, in general, an issue,
         | 
| 43 | 
            +
            unless you use the data from SQLite in environments in which URLs can be
         | 
| 44 | 
            +
            opened or code executed.
         | 
| 45 | 
            +
             | 
| 36 46 | 
             
            * Motivation
         | 
| 37 47 |  | 
| 38 48 | 
             
            LogSense moves along the lines of tools such as [[https://goaccess.io/][GoAccess]] (which
         | 
| @@ -54,6 +64,7 @@ generated files are then made available on a private area on the web. | |
| 54 64 | 
             
              gem install log_sense
         | 
| 55 65 | 
             
              #+end_src
         | 
| 56 66 |  | 
| 67 | 
            +
             | 
| 57 68 | 
             
            * Usage
         | 
| 58 69 |  | 
| 59 70 | 
             
              #+begin_src bash :results raw output :wrap example
         | 
| @@ -62,21 +73,22 @@ generated files are then made available on a private area on the web. | |
| 62 73 |  | 
| 63 74 | 
             
              #+RESULTS:
         | 
| 64 75 | 
             
              #+begin_example
         | 
| 65 | 
            -
              Usage: log_sense [options] [logfile]
         | 
| 76 | 
            +
              Usage: log_sense [options] [logfile ...]
         | 
| 66 77 | 
             
                      --title=TITLE                Title to use in the report
         | 
| 67 78 | 
             
                  -f, --input-format=FORMAT        Input format (either rails or apache)
         | 
| 68 | 
            -
                  -i, --input-file=INPUT_FILE      Input file
         | 
| 69 79 | 
             
                  -t, --output-format=FORMAT       Output format: html, org, txt, sqlite. See below for available formats
         | 
| 70 80 | 
             
                  -o, --output-file=OUTPUT_FILE    Output file
         | 
| 71 81 | 
             
                  -b, --begin=DATE                 Consider entries after or on DATE
         | 
| 72 82 | 
             
                  -e, --end=DATE                   Consider entries before or on DATE
         | 
| 73 | 
            -
                  -l, --limit=N                     | 
| 83 | 
            +
                  -l, --limit=N                    Limit to the N most requested resources (defaults to 900)
         | 
| 84 | 
            +
                  -w, --width=WIDTH                Maximum width of URL and description columns in text reports
         | 
| 74 85 | 
             
                  -c, --crawlers=POLICY            Decide what to do with crawlers (applies to Apache Logs)
         | 
| 75 86 | 
             
                  -n, --no-selfpolls               Ignore self poll entries (requests from ::1; applies to Apache Logs)
         | 
| 87 | 
            +
                      --verbose                    Inform about progress (prints to STDERR)
         | 
| 76 88 | 
             
                  -v, --version                    Prints version information
         | 
| 77 89 | 
             
                  -h, --help                       Prints this help
         | 
| 78 90 |  | 
| 79 | 
            -
              This is version 1. | 
| 91 | 
            +
              This is version 1.5.0
         | 
| 80 92 |  | 
| 81 93 | 
             
              Output formats
         | 
| 82 94 | 
             
              rails parsing can produce the following outputs:
         | 
| @@ -85,6 +97,7 @@ generated files are then made available on a private area on the web. | |
| 85 97 | 
             
                - html
         | 
| 86 98 | 
             
              apache parsing can produce the following outputs:
         | 
| 87 99 | 
             
                - sqlite
         | 
| 100 | 
            +
                - txt
         | 
| 88 101 | 
             
                - html
         | 
| 89 102 | 
             
              #+end_example
         | 
| 90 103 |  | 
| @@ -95,6 +108,7 @@ log_sense -f apache -i access.log -t txt > access-data.txt | |
| 95 108 | 
             
            log_sense -f rails -i production.log -t html -o performance.txt
         | 
| 96 109 | 
             
            #+end_example
         | 
| 97 110 |  | 
| 111 | 
            +
             | 
| 98 112 | 
             
            * Change Log
         | 
| 99 113 |  | 
| 100 114 | 
             
            See the [[file:CHANGELOG.org][CHANGELOG]] file.
         | 
| @@ -109,8 +123,8 @@ Concerning the outputs: | |
| 109 123 | 
             
            - HTML reports use [[https://get.foundation/][Zurb Foundation]], [[https://www.datatables.net/][Data Tables]], and [[https://vega.github.io/vega-lite/][Vega Light]], which
         | 
| 110 124 | 
             
              are all downloaded from a CDN
         | 
| 111 125 | 
             
            - The textual format is compatible with [[https://orgmode.org/][Org Mode]] and can be further
         | 
| 112 | 
            -
              processed to any format [[https://orgmode.org/][Org Mode]] can be exported to  | 
| 113 | 
            -
              and PDF | 
| 126 | 
            +
              processed to any format [[https://orgmode.org/][Org Mode]] can be exported to, including HTML
         | 
| 127 | 
            +
              and PDF, with the word of warning in the section above. 
         | 
| 114 128 |  | 
| 115 129 | 
             
            * Author and Contributors
         | 
| 116 130 |  | 
| @@ -118,8 +132,8 @@ Concerning the outputs: | |
| 118 132 |  | 
| 119 133 | 
             
            * Known Bugs
         | 
| 120 134 |  | 
| 121 | 
            -
            No known bugs; an unknown number of unknown bugs.
         | 
| 122 | 
            -
             | 
| 135 | 
            +
            No known bugs; an unknown number of unknown bugs.  (See the open issues for
         | 
| 136 | 
            +
            the known bugs.)
         | 
| 123 137 |  | 
| 124 138 | 
             
            * License
         | 
| 125 139 |  | 
    
        data/Rakefile
    CHANGED
    
    | @@ -9,7 +9,21 @@ end | |
| 9 9 | 
             
            require_relative './lib/log_sense/ip_locator.rb'
         | 
| 10 10 |  | 
| 11 11 | 
             
            desc "Convert Geolocation DB to sqlite"
         | 
| 12 | 
            -
            task :dbip_to_sqlite3, [: | 
| 13 | 
            -
              filename = args[: | 
| 14 | 
            -
             | 
| 12 | 
            +
            task :dbip_to_sqlite3, [:year_month] do |tasks, args|
         | 
| 13 | 
            +
              filename = "./ip_locations/dbip-country-lite-#{args[:year_month]}.csv"
         | 
| 14 | 
            +
             | 
| 15 | 
            +
              if !File.exist? filename
         | 
| 16 | 
            +
                puts "Error. Could not find: #{filename}"
         | 
| 17 | 
            +
                puts
         | 
| 18 | 
            +
                puts 'I see the following files:'
         | 
| 19 | 
            +
                puts Dir.glob("ip_locations/dbip-country-lite*").map { |x| "- #{x}\n" }
         | 
| 20 | 
            +
                puts ''
         | 
| 21 | 
            +
                puts '1. Download (if necessary) a more recent version from: https://db-ip.com/db/download/ip-to-country-lite'
         | 
| 22 | 
            +
                puts '2. Save downloaded file to ip_locations/'
         | 
| 23 | 
            +
                puts '3. Relaunch with YYYY-MM'
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                exit
         | 
| 26 | 
            +
              else
         | 
| 27 | 
            +
                LogSense::IpLocator::dbip_to_sqlite filename
         | 
| 28 | 
            +
              end
         | 
| 15 29 | 
             
            end
         | 
    
        data/exe/log_sense
    CHANGED
    
    | @@ -7,21 +7,15 @@ require 'log_sense.rb' | |
| 7 7 | 
             
            #
         | 
| 8 8 |  | 
| 9 9 | 
             
            # this better be here... OptionsParser consumes ARGV
         | 
| 10 | 
            -
            @command_line = ARGV.join( | 
| 11 | 
            -
             | 
| 10 | 
            +
            @command_line = ARGV.join(' ')
         | 
| 12 11 | 
             
            @options     = LogSense::OptionsParser.parse ARGV
         | 
| 13 | 
            -
            @input_file  = @options[:input_file] || ARGV[0]
         | 
| 14 12 | 
             
            @output_file = @options[:output_file]
         | 
| 15 13 |  | 
| 16 | 
            -
            if  | 
| 17 | 
            -
              puts "Error:  | 
| 18 | 
            -
              exit
         | 
| 19 | 
            -
            end
         | 
| 20 | 
            -
             | 
| 21 | 
            -
            if not File.exist? @input_file
         | 
| 22 | 
            -
              puts "Error: input file '#{@input_file}' does not exist"
         | 
| 14 | 
            +
            if ARGV.map { |x| File.exist?(x) }.include?(false)
         | 
| 15 | 
            +
              $stderr.puts "Error: input file(s) '#{ARGV.reject { |x| File.exist(x) }.join(', ')}' do not exist"
         | 
| 23 16 | 
             
              exit 1
         | 
| 24 17 | 
             
            end
         | 
| 18 | 
            +
            @input_files = ARGV.empty? ? [$stdin] : ARGV.map { |x| File.open(x, 'r') }
         | 
| 25 19 |  | 
| 26 20 | 
             
            #
         | 
| 27 21 | 
             
            # Parse Log and Track Statistics
         | 
| @@ -36,32 +30,46 @@ when 'apache' | |
| 36 30 | 
             
            when 'rails'
         | 
| 37 31 | 
             
              parser_klass = LogSense::RailsLogParser
         | 
| 38 32 | 
             
              cruncher_klass = LogSense::RailsDataCruncher
         | 
| 33 | 
            +
            else
         | 
| 34 | 
            +
              $stderr.puts "Error: input format #{@options[:input_format]} not understood."
         | 
| 35 | 
            +
              exit 1
         | 
| 39 36 | 
             
            end
         | 
| 40 37 |  | 
| 41 | 
            -
             | 
| 38 | 
            +
            $stderr.puts "Parsing input files..." if @options[:verbose]
         | 
| 39 | 
            +
            @db = parser_klass.parse @input_files
         | 
| 42 40 |  | 
| 43 | 
            -
            if @options[:output_format] | 
| 44 | 
            -
               | 
| 41 | 
            +
            if @options[:output_format] == 'sqlite'
         | 
| 42 | 
            +
              $stderr.puts "Saving to SQLite3..." if @options[:verbose]
         | 
| 43 | 
            +
              ddb = SQLite3::Database.new(@output_file || 'db.sqlite3')
         | 
| 45 44 | 
             
              b = SQLite3::Backup.new(ddb, 'main', @db, 'main')
         | 
| 46 45 | 
             
              b.step(-1) #=> DONE
         | 
| 47 46 | 
             
              b.finish
         | 
| 48 47 | 
             
            else
         | 
| 48 | 
            +
              $stderr.puts "Aggregating data..." if @options[:verbose]
         | 
| 49 49 | 
             
              @data = cruncher_klass.crunch @db, @options
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              $stderr.puts "Geolocating..." if @options[:verbose]
         | 
| 50 52 | 
             
              @data = LogSense::IpLocator.geolocate @data
         | 
| 51 53 |  | 
| 54 | 
            +
              $stderr.puts "Grouping by country..." if @options[:verbose]
         | 
| 55 | 
            +
              country_col = @data[:ips][0].size - 1
         | 
| 56 | 
            +
              @data[:countries] = @data[:ips].group_by { |x| x[country_col] }
         | 
| 57 | 
            +
             | 
| 52 58 | 
             
              @ended_at = Time.now
         | 
| 53 59 | 
             
              @duration = @ended_at - @started_at
         | 
| 54 60 |  | 
| 55 61 | 
             
              @data = @data.merge({
         | 
| 56 62 | 
             
                                    command: @command_line,
         | 
| 57 | 
            -
                                     | 
| 63 | 
            +
                                    filenames: ARGV,
         | 
| 64 | 
            +
                                    log_files: @input_files,
         | 
| 58 65 | 
             
                                    started_at: @started_at,
         | 
| 59 66 | 
             
                                    ended_at: @ended_at,
         | 
| 60 | 
            -
                                    duration: @duration
         | 
| 67 | 
            +
                                    duration: @duration,
         | 
| 68 | 
            +
                                    width: @options[:width]
         | 
| 61 69 | 
             
                                  })
         | 
| 62 | 
            -
             | 
| 63 70 | 
             
              #
         | 
| 64 71 | 
             
              # Emit Output
         | 
| 65 72 | 
             
              #
         | 
| 73 | 
            +
              $stderr.puts "Emitting..." if @options[:verbose]
         | 
| 66 74 | 
             
              puts LogSense::Emitter.emit @data, @options
         | 
| 67 75 | 
             
            end
         | 
| Binary file | 
| @@ -6,7 +6,7 @@ module LogSense | |
| 6 6 | 
             
                # @ variables are automatically put in the returned data
         | 
| 7 7 | 
             
                #
         | 
| 8 8 |  | 
| 9 | 
            -
                def self.crunch db, options = { limit:  | 
| 9 | 
            +
                def self.crunch db, options = { limit: 900 }
         | 
| 10 10 | 
             
                  first_day_s = db.execute "SELECT datetime from LogLine order by datetime limit 1"
         | 
| 11 11 | 
             
                  last_day_s  = db.execute "SELECT datetime from LogLine order by datetime desc limit 1"
         | 
| 12 12 |  | 
| @@ -15,17 +15,17 @@ module LogSense | |
| 15 15 | 
             
                  @last_day =  last_day_s&.first&.first ? Date.parse(last_day_s[0][0]) : nil
         | 
| 16 16 |  | 
| 17 17 | 
             
                  @total_days = 0
         | 
| 18 | 
            -
                  if @first_day  | 
| 19 | 
            -
             | 
| 20 | 
            -
                   | 
| 18 | 
            +
                  @total_days = (@last_day - @first_day).to_i if @first_day && @last_day
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  @source_files   = db.execute 'SELECT distinct(source_file) from LogLine'
         | 
| 21 21 |  | 
| 22 | 
            -
                  @log_size       = db.execute  | 
| 22 | 
            +
                  @log_size       = db.execute 'SELECT count(datetime) from LogLine'
         | 
| 23 23 | 
             
                  @log_size       = @log_size[0][0]
         | 
| 24 24 |  | 
| 25 25 | 
             
                  @selfpolls_size = db.execute "SELECT count(datetime) from LogLine where ip == '::1'"
         | 
| 26 26 | 
             
                  @selfpolls_size = @selfpolls_size[0][0]
         | 
| 27 27 |  | 
| 28 | 
            -
                  @crawlers_size  = db.execute  | 
| 28 | 
            +
                  @crawlers_size  = db.execute 'SELECT count(datetime) from LogLine where bot == 1'
         | 
| 29 29 | 
             
                  @crawlers_size = @crawlers_size[0][0]
         | 
| 30 30 |  | 
| 31 31 | 
             
                  @first_day_requested = options[:from_date]
         | 
| @@ -35,7 +35,7 @@ module LogSense | |
| 35 35 | 
             
                  @last_day_in_analysis = date_intersect options[:to_date], @last_day, :min
         | 
| 36 36 |  | 
| 37 37 | 
             
                  @total_days_in_analysis = 0
         | 
| 38 | 
            -
                  if @first_day_in_analysis  | 
| 38 | 
            +
                  if @first_day_in_analysis && @last_day_in_analysis
         | 
| 39 39 | 
             
                    @total_days_in_analysis = (@last_day_in_analysis - @first_day_in_analysis).to_i
         | 
| 40 40 | 
             
                  end
         | 
| 41 41 |  | 
| @@ -45,24 +45,24 @@ module LogSense | |
| 45 45 | 
             
                  filter = [
         | 
| 46 46 | 
             
                    (options[:from_date] ? "date(datetime) >= '#{options[:from_date]}'" : nil),
         | 
| 47 47 | 
             
                    (options[:to_date] ? "date(datetime) <= '#{options[:to_date]}'" : nil),
         | 
| 48 | 
            -
                    (options[:only_crawlers] ?  | 
| 49 | 
            -
                    (options[:ignore_crawlers] ?  | 
| 48 | 
            +
                    (options[:only_crawlers] ? 'bot == 1' : nil),
         | 
| 49 | 
            +
                    (options[:ignore_crawlers] ? 'bot == 0' : nil),
         | 
| 50 50 | 
             
                    (options[:no_selfpolls] ? "ip != '::1'" : nil),
         | 
| 51 | 
            -
                     | 
| 51 | 
            +
                    'true'
         | 
| 52 52 | 
             
                  ].compact.join " and "
         | 
| 53 53 |  | 
| 54 54 | 
             
                  mega = 1024 * 1024
         | 
| 55 55 | 
             
                  giga = mega * 1024
         | 
| 56 56 | 
             
                  tera = giga * 1024
         | 
| 57 | 
            -
             | 
| 57 | 
            +
             
         | 
| 58 58 | 
             
                  # in alternative to sum(size)
         | 
| 59 59 | 
             
                  human_readable_size = <<-EOS
         | 
| 60 | 
            -
                  CASE | 
| 60 | 
            +
                  CASE
         | 
| 61 61 | 
             
                  WHEN sum(size) <  1024 THEN sum(size) || ' B' 
         | 
| 62 62 | 
             
                  WHEN sum(size) >= 1024 AND sum(size) < (#{mega}) THEN ROUND((CAST(sum(size) AS REAL) / 1024), 2) || ' KB' 
         | 
| 63 63 | 
             
                  WHEN sum(size) >= (#{mega})  AND sum(size) < (#{giga}) THEN ROUND((CAST(sum(size) AS REAL) / (#{mega})), 2) || ' MB' 
         | 
| 64 64 | 
             
                  WHEN sum(size) >= (#{giga}) AND sum(size) < (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{giga})), 2) || ' GB' 
         | 
| 65 | 
            -
                  WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB' | 
| 65 | 
            +
                  WHEN sum(size) >= (#{tera}) THEN ROUND((CAST(sum(size) AS REAL) / (#{tera})), 2) || ' TB'
         | 
| 66 66 | 
             
                  END AS size
         | 
| 67 67 | 
             
                  EOS
         | 
| 68 68 |  | 
| @@ -89,16 +89,18 @@ module LogSense | |
| 89 89 |  | 
| 90 90 | 
             
                  @daily_distribution = db.execute "SELECT date(datetime), #{human_readable_day}, count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine  where #{filter} group by date(datetime)"
         | 
| 91 91 | 
             
                  @time_distribution = db.execute "SELECT strftime('%H', datetime), count(datetime), count(distinct(unique_visitor)), #{human_readable_size} from LogLine  where #{filter} group by strftime('%H', datetime)"
         | 
| 92 | 
            -
                  @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 93 | 
            -
                  @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size} from LogLine  where #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 94 | 
            -
                  @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and extension == '.html' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 95 | 
            -
                  @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 96 92 |  | 
| 97 | 
            -
                   | 
| 98 | 
            -
             | 
| 99 | 
            -
                   | 
| 93 | 
            +
                  good_statuses = "(status like '2%' or status like '3%')"
         | 
| 94 | 
            +
                  bad_statuses = "(status like '4%' or status like '5%')"
         | 
| 95 | 
            +
                  html_page = "(extension like '.htm%')"
         | 
| 96 | 
            +
                  non_html_page = "(extension not like '.htm%')"
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                  @most_requested_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 99 | 
            +
                  @most_requested_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), #{human_readable_size}, status from LogLine where #{good_statuses} and #{non_html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                  @missed_pages = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{html_page} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 102 | 
            +
                  @missed_resources = db.execute "SELECT path, count(path), count(distinct(unique_visitor)), status from LogLine where #{bad_statuses} and #{filter} group by path order by count(path) desc limit #{options[:limit]}"
         | 
| 100 103 |  | 
| 101 | 
            -
                  @attacks = db.execute "SELECT path, count(path), count(distinct(unique_visitor)) from LogLine where status == '404' and #{filter} and (#{@reasonable_requests_exts}) group by path order by count(path) desc  limit #{options[:limit]}"
         | 
| 102 104 | 
             
                  @statuses = db.execute "SELECT status, count(status) from LogLine where #{filter} group by status order by status"
         | 
| 103 105 |  | 
| 104 106 | 
             
                  @by_day_4xx = db.execute "SELECT date(datetime), count(datetime) from LogLine where substr(status, 1,1) == '4' and #{filter} group by date(datetime)"
         | 
| @@ -115,20 +117,19 @@ module LogSense | |
| 115 117 |  | 
| 116 118 | 
             
                  @ips = db.execute "SELECT ip, count(ip), count(distinct(unique_visitor)), #{human_readable_size} from LogLine where #{filter} group by ip order by count(ip) desc limit #{options[:limit]}"
         | 
| 117 119 |  | 
| 118 | 
            -
                  @streaks = db.execute  | 
| 120 | 
            +
                  @streaks = db.execute 'SELECT ip, substr(datetime, 1, 10), path from LogLine order by ip, datetime'
         | 
| 119 121 | 
             
                  data = {}
         | 
| 120 122 |  | 
| 121 | 
            -
                   | 
| 122 | 
            -
                    var_as_symbol = variable.to_s[1 | 
| 123 | 
            -
                    data[var_as_symbol] =  | 
| 123 | 
            +
                  instance_variables.each do |variable|
         | 
| 124 | 
            +
                    var_as_symbol = variable.to_s[1..].to_sym
         | 
| 125 | 
            +
                    data[var_as_symbol] = instance_variable_get(variable)
         | 
| 124 126 | 
             
                  end
         | 
| 127 | 
            +
             | 
| 125 128 | 
             
                  data
         | 
| 126 129 | 
             
                end
         | 
| 127 130 |  | 
| 128 | 
            -
                 | 
| 129 | 
            -
             | 
| 130 | 
            -
                def self.date_intersect date1, date2, method
         | 
| 131 | 
            -
                  if date1 and date2
         | 
| 131 | 
            +
                def self.date_intersect(date1, date2, method)
         | 
| 132 | 
            +
                  if date1 && date2
         | 
| 132 133 | 
             
                    [date1, date2].send(method)
         | 
| 133 134 | 
             
                  elsif date1
         | 
| 134 135 | 
             
                    date1
         | 
| @@ -138,4 +139,3 @@ module LogSense | |
| 138 139 | 
             
                end
         | 
| 139 140 | 
             
              end
         | 
| 140 141 | 
             
            end
         | 
| 141 | 
            -
             | 
| @@ -31,22 +31,21 @@ module LogSense | |
| 31 31 |  | 
| 32 32 | 
             
                TIMESTAMP = /(?<date>#{DAY}\/#{MONTH}\/#{YEAR}):(?<time>#{TIMEC}:#{TIMEC}:#{TIMEC} #{TIMEZONE})/
         | 
| 33 33 |  | 
| 34 | 
            -
                HTTP_METHODS | 
| 35 | 
            -
                WEBDAV_METHODS | 
| 36 | 
            -
                OTHER_METHODS | 
| 37 | 
            -
                METHOD | 
| 38 | 
            -
                PROTOCOL | 
| 39 | 
            -
                URL | 
| 40 | 
            -
                REFERER | 
| 41 | 
            -
                RETURN_CODE | 
| 42 | 
            -
                SIZE | 
| 43 | 
            -
             | 
| 44 | 
            -
                USER_AGENT = /(?<user_agent>[^"]+)/
         | 
| 34 | 
            +
                HTTP_METHODS = /GET|HEAD|POST|PUT|DELETE|CONNECT|OPTIONS|TRACE|PATCH/
         | 
| 35 | 
            +
                WEBDAV_METHODS = /COPY|LOCK|MKCOL|MOVE|PROPFIND|PROPPATCH|UNLOCK/
         | 
| 36 | 
            +
                OTHER_METHODS = /SEARCH|REPORT|PRI|HEAD\/robots.txt/
         | 
| 37 | 
            +
                METHOD = /(?<method>#{HTTP_METHODS}|#{WEBDAV_METHODS}|#{OTHER_METHODS})/
         | 
| 38 | 
            +
                PROTOCOL = /(?<protocol>HTTP\/[0-9]\.[0-9]|-|.*)/
         | 
| 39 | 
            +
                URL = /(?<url>[^ ]+)/
         | 
| 40 | 
            +
                REFERER = /(?<referer>[^"]*)/
         | 
| 41 | 
            +
                RETURN_CODE = /(?<status>[1-5][0-9][0-9])/
         | 
| 42 | 
            +
                SIZE = /(?<size>[0-9]+|-)/
         | 
| 43 | 
            +
                USER_AGENT = /(?<user_agent>[^"]*)/
         | 
| 45 44 |  | 
| 46 45 | 
             
                attr_reader :format
         | 
| 47 46 |  | 
| 48 | 
            -
                def initialize | 
| 49 | 
            -
                  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "#{METHOD} #{URL} #{PROTOCOL}" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
         | 
| 47 | 
            +
                def initialize
         | 
| 48 | 
            +
                  @format = /#{IP} #{IDENT} #{USERID} \[#{TIMESTAMP}\] "(#{METHOD} #{URL} #{PROTOCOL}|-|.+)" #{RETURN_CODE} #{SIZE} "#{REFERER}" "#{USER_AGENT}"/
         | 
| 50 49 | 
             
                end
         | 
| 51 50 |  | 
| 52 51 | 
             
                def parse line
         | 
| @@ -7,10 +7,9 @@ module LogSense | |
| 7 7 | 
             
                # parse an Apache log file and return a SQLite3 DB
         | 
| 8 8 | 
             
                #
         | 
| 9 9 |  | 
| 10 | 
            -
                def self.parse | 
| 11 | 
            -
                   | 
| 10 | 
            +
                def self.parse(streams, options = {})
         | 
| 11 | 
            +
                  db = SQLite3::Database.new ':memory:'
         | 
| 12 12 |  | 
| 13 | 
            -
                  db = SQLite3::Database.new ":memory:"
         | 
| 14 13 | 
             
                  db.execute "CREATE TABLE IF NOT EXISTS LogLine(
         | 
| 15 14 | 
             
                  id INTEGER PRIMARY KEY AUTOINCREMENT,
         | 
| 16 15 | 
             
                  datetime TEXT,
         | 
| @@ -28,15 +27,18 @@ module LogSense | |
| 28 27 | 
             
                  browser TEXT,
         | 
| 29 28 | 
             
                  browser_version TEXT,
         | 
| 30 29 | 
             
                  platform TEXT,
         | 
| 31 | 
            -
                  platform_version TEXT | 
| 30 | 
            +
                  platform_version TEXT,
         | 
| 31 | 
            +
                  source_file TEXT,
         | 
| 32 | 
            +
                  line_number INTEGER
         | 
| 33 | 
            +
                  )"
         | 
| 32 34 |  | 
| 33 | 
            -
                  ins = db.prepare( | 
| 34 | 
            -
                            datetime, | 
| 35 | 
            +
                  ins = db.prepare("insert into LogLine (
         | 
| 36 | 
            +
                            datetime,
         | 
| 35 37 | 
             
                            ip,
         | 
| 36 38 | 
             
                            user,
         | 
| 37 39 | 
             
                            unique_visitor,
         | 
| 38 40 | 
             
                            method,
         | 
| 39 | 
            -
                            path, | 
| 41 | 
            +
                            path,
         | 
| 40 42 | 
             
                            extension,
         | 
| 41 43 | 
             
                            status,
         | 
| 42 44 | 
             
                            size,
         | 
| @@ -46,44 +48,50 @@ module LogSense | |
| 46 48 | 
             
                            browser,
         | 
| 47 49 | 
             
                            browser_version,
         | 
| 48 50 | 
             
                            platform,
         | 
| 49 | 
            -
                            platform_version | 
| 50 | 
            -
             | 
| 51 | 
            +
                            platform_version,
         | 
| 52 | 
            +
                            source_file,
         | 
| 53 | 
            +
                            line_number
         | 
| 54 | 
            +
                            )
         | 
| 55 | 
            +
                          values (#{Array.new(18, '?').join(', ')})")
         | 
| 51 56 |  | 
| 52 57 | 
             
                  parser = ApacheLogLineParser.new
         | 
| 53 | 
            -
             | 
| 54 | 
            -
                   | 
| 55 | 
            -
                     | 
| 56 | 
            -
                       | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
                         | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 64 | 
            -
             | 
| 65 | 
            -
             | 
| 66 | 
            -
             | 
| 67 | 
            -
             | 
| 68 | 
            -
             | 
| 69 | 
            -
             | 
| 70 | 
            -
             | 
| 71 | 
            -
             | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 75 | 
            -
             | 
| 76 | 
            -
             | 
| 77 | 
            -
             | 
| 58 | 
            +
             | 
| 59 | 
            +
                  streams.each do |stream|
         | 
| 60 | 
            +
                    stream.readlines.each_with_index do |line, line_number|
         | 
| 61 | 
            +
                      begin
         | 
| 62 | 
            +
                        hash = parser.parse line
         | 
| 63 | 
            +
                        ua = Browser.new(hash[:user_agent], accept_language: 'en-us')
         | 
| 64 | 
            +
                        ins.execute(
         | 
| 65 | 
            +
                          DateTime.parse("#{hash[:date]}T#{hash[:time]}").iso8601,
         | 
| 66 | 
            +
                          hash[:ip],
         | 
| 67 | 
            +
                          hash[:userid],
         | 
| 68 | 
            +
                          unique_visitor_id(hash),
         | 
| 69 | 
            +
                          hash[:method],
         | 
| 70 | 
            +
                          hash[:url],
         | 
| 71 | 
            +
                          (hash[:url] ? File.extname(hash[:url]) : ''),
         | 
| 72 | 
            +
                          hash[:status],
         | 
| 73 | 
            +
                          hash[:size].to_i,
         | 
| 74 | 
            +
                          hash[:referer],
         | 
| 75 | 
            +
                          hash[:user_agent],
         | 
| 76 | 
            +
                          ua.bot? ? 1 : 0,
         | 
| 77 | 
            +
                          (ua.name || ''),
         | 
| 78 | 
            +
                          (ua.version || ''),
         | 
| 79 | 
            +
                          (ua.platform.name || ''),
         | 
| 80 | 
            +
                          (ua.platform.version || ''),
         | 
| 81 | 
            +
                          stream == $stdin ? "stdin" : stream.path,
         | 
| 82 | 
            +
                          line_number
         | 
| 83 | 
            +
                        )
         | 
| 84 | 
            +
                      rescue StandardError => e
         | 
| 85 | 
            +
                        $stderr.puts e.message
         | 
| 86 | 
            +
                      end
         | 
| 78 87 | 
             
                    end
         | 
| 79 88 | 
             
                  end
         | 
| 80 | 
            -
             | 
| 89 | 
            +
             | 
| 81 90 | 
             
                  db
         | 
| 82 91 | 
             
                end
         | 
| 83 92 |  | 
| 84 93 | 
             
                def self.unique_visitor_id hash
         | 
| 85 94 | 
             
                  "#{hash[:date]} #{hash[:ip]} #{hash[:user_agent]}"
         | 
| 86 95 | 
             
                end
         | 
| 87 | 
            -
             | 
| 88 96 | 
             
              end
         | 
| 89 97 | 
             
            end
         |