logeater 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ce5032b80ebdd86abf84a53642d881c1e61315ac
4
+ data.tar.gz: 215a2ba9e8e0d90dd14daa6a7df4baa7c2295fe4
5
+ SHA512:
6
+ metadata.gz: f0a033a7a9e51498b21728cca217978dcd1b1145cddf2a704386558496e01e4949d1e91c0bd20bd411f6540d46ad9edd929adfc25dff2f3e484c365a0672164e
7
+ data.tar.gz: bd808f7f9d91d28b09b5ccc0f2e552319edf0f3f2e9e1b64495fe2b71ac7e906acbf7daa74d37ebb333823a31604877d6e10f968c1dba17f0e194cfd14f25ea6
data/.gitignore ADDED
@@ -0,0 +1,14 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ *.bundle
11
+ *.so
12
+ *.o
13
+ *.a
14
+ mkmf.log
data/.irbrc ADDED
@@ -0,0 +1,2 @@
1
+ $:.push File.join(Dir.pwd, "lib"); require "logeater"
2
+
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "https://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in logeater.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2015 Bob Lail
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # Logeater
2
+
3
+ Parses log files and imports them into a database
4
+
5
+ ### Features
6
+
7
+ - Can read from plain text or gzipped log files
8
+ - [Parses parameters safely](https://github.com/concordia-publishing-house/logeater/blob/master/lib/logeater/params_parser.rb)
9
+ - Can import a batch of files at once
10
+ - Parses [these attributes](https://github.com/concordia-publishing-house/logeater/blob/master/db/schema.rb#L19-L32) of requests
11
+
12
+
13
+ ### Usage
14
+
15
+ Clone the gem
16
+
17
+ git clone git@github.com:concordia-publishing-house/logeater.git
18
+ bundle
19
+
20
+ Create the development database
21
+
22
+ bundle exec rake db:create db:migrate
23
+
24
+ Install the gem
25
+
26
+ bundle exec rake install
27
+
28
+ Import log files
29
+
30
+ logeater my_app ~/Desktop/logs/*.gz
31
+
32
+
33
+ ### To Do
34
+
35
+ - Set up databases without cloning the gem?
36
+ - Import to a [Heroku Postgres database](https://dashboard.heroku.com/apps/logs-production)?
37
+ - Parse other kinds of logs?
38
+ - Collect other data from Rails logs?
39
+
40
+
41
+ ### Contributing
42
+
43
+ 1. Fork it ( https://github.com/[my-github-username]/logeater/fork )
44
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
45
+ 3. Commit your changes (`git commit -am "Add some feature"`)
46
+ 4. Push to the branch (`git push origin my-new-feature`)
47
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ require "standalone_migrations"
4
+ StandaloneMigrations::Tasks.load_tasks
5
+
6
+ require "rake/testtask"
7
+ Rake::TestTask.new(:test) do |t|
8
+ t.libs << "lib"
9
+ t.libs << "test"
10
+ t.pattern = "test/**/*_test.rb"
11
+ t.verbose = false
12
+ end
data/bin/logeater ADDED
@@ -0,0 +1,39 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "logeater"
4
+
5
+ app = ARGV.shift
6
+ unless app
7
+ puts "USAGE: logeater <app> <files...>"
8
+ exit
9
+ end
10
+
11
+ files = ARGV
12
+ if files.empty?
13
+ puts "USAGE: logeater <app> <files...>"
14
+ exit
15
+ end
16
+
17
+ started_all = Time.now
18
+ files.each_with_index do |file, i|
19
+ reader = Logeater::Reader.new(app, file, progress: true)
20
+ reader.remove_existing_entries!
21
+
22
+ started_count = Logeater::Request.count
23
+ started_at = Time.now
24
+ reader.import
25
+ finished_at = Time.now
26
+ finished_count = Logeater::Request.count
27
+
28
+ puts " > \e[34mImported \e[1m%d\e[0;34m requests in \e[1m%.2f\e[0;34m seconds (%d of %d)\e[0m\n\n" % [
29
+ finished_count - started_count,
30
+ finished_at - started_at,
31
+ i + 1,
32
+ files.length ]
33
+ end
34
+
35
+ finished_all = Time.now
36
+ seconds = finished_all - started_all
37
+ minutes = (seconds / 60).to_i
38
+ seconds -= (minutes * 60)
39
+ puts "Total time %d minutes, %.2f seconds" % [minutes, seconds]
data/db/config.yml ADDED
@@ -0,0 +1,13 @@
1
+ development:
2
+ adapter: postgresql
3
+ encoding: utf8
4
+ database: logs_development
5
+ host: localhost
6
+ min_messages: WARNING
7
+
8
+ test:
9
+ adapter: postgresql
10
+ encoding: utf8
11
+ database: logs_test
12
+ host: localhost
13
+ min_messages: WARNING
@@ -0,0 +1,31 @@
1
+ class CreateRequests < ActiveRecord::Migration
2
+ def change
3
+ create_table :requests do |t|
4
+ t.string :app, null: false
5
+ t.string :logfile, null: false
6
+
7
+ t.string :uuid, null: false
8
+ t.string :subdomain
9
+ t.timestamp :started_at
10
+ t.timestamp :completed_at
11
+ t.integer :duration
12
+ t.string :http_method
13
+ t.string :path
14
+ t.text :params
15
+ t.string :controller
16
+ t.string :action
17
+ t.string :remote_ip
18
+ t.string :format
19
+ t.integer :http_status
20
+ t.string :http_response
21
+
22
+ t.timestamps
23
+ end
24
+
25
+ add_index :requests, :app
26
+ add_index :requests, :logfile
27
+ add_index :requests, :uuid, unique: true
28
+ add_index :requests, [:controller, :action]
29
+ add_index :requests, :http_status
30
+ end
31
+ end
@@ -0,0 +1,9 @@
1
+ class ChangeRequestsParamsToJson < ActiveRecord::Migration
2
+ def up
3
+ execute "alter table requests alter column params type json using params::json"
4
+ end
5
+
6
+ def down
7
+ change_column :requests, :params, :text
8
+ end
9
+ end
data/db/schema.rb ADDED
@@ -0,0 +1,43 @@
1
+ # encoding: UTF-8
2
+ # This file is auto-generated from the current state of the database. Instead
3
+ # of editing this file, please use the migrations feature of Active Record to
4
+ # incrementally modify your database, and then regenerate this schema definition.
5
+ #
6
+ # Note that this schema.rb definition is the authoritative source for your
7
+ # database schema. If you need to create the application database on another
8
+ # system, you should be using db:schema:load, not running all the migrations
9
+ # from scratch. The latter is a flawed and unsustainable approach (the more migrations
10
+ # you'll amass, the slower it'll run and the greater likelihood for issues).
11
+ #
12
+ # It's strongly recommended to check this file into your version control system.
13
+
14
+ ActiveRecord::Schema.define(:version => 20150122021627) do
15
+
16
+ create_table "requests", :force => true do |t|
17
+ t.string "app", :null => false
18
+ t.string "logfile", :null => false
19
+ t.string "uuid", :null => false
20
+ t.string "subdomain"
21
+ t.datetime "started_at"
22
+ t.datetime "completed_at"
23
+ t.integer "duration"
24
+ t.string "http_method"
25
+ t.string "path"
26
+ t.json "params"
27
+ t.string "controller"
28
+ t.string "action"
29
+ t.string "remote_ip"
30
+ t.string "format"
31
+ t.integer "http_status"
32
+ t.string "http_response"
33
+ t.datetime "created_at", :null => false
34
+ t.datetime "updated_at", :null => false
35
+ end
36
+
37
+ add_index "requests", ["app"], :name => "index_requests_on_app"
38
+ add_index "requests", ["controller", "action"], :name => "index_requests_on_controller_and_action"
39
+ add_index "requests", ["http_status"], :name => "index_requests_on_http_status"
40
+ add_index "requests", ["logfile"], :name => "index_requests_on_logfile"
41
+ add_index "requests", ["uuid"], :name => "index_requests_on_uuid", :unique => true
42
+
43
+ end
@@ -0,0 +1,76 @@
1
+ require "ripper"
2
+
3
+ module Logeater
4
+ class ParamsParser
5
+ attr_reader :params
6
+
7
+ def initialize(params)
8
+ @params = params
9
+ end
10
+
11
+ def parse!
12
+ identify tokenize_hash(clean(params))
13
+ end
14
+
15
+ def clean(params)
16
+ loop do
17
+ result = params.gsub(/\#<((?:[\w]|::)+):[^<>]+>/) { "\"#{$1}\"" }
18
+ break if result == params
19
+ params = result
20
+ end
21
+ params
22
+ end
23
+
24
+ private
25
+
26
+ def tokenize_hash(ruby)
27
+ sexp = Ripper.sexp(ruby)
28
+ raise Parser::MalformedParameters.new(ruby) unless sexp
29
+
30
+ # [:program, [[:hash, ... ]]]
31
+ sexp[1][0]
32
+ end
33
+
34
+ def identify(sexp)
35
+ case sexp[0]
36
+
37
+ # [:string_literal, [:string_content, [:@tstring_content, "utf8", [1, 2]]]]
38
+ # [:string_literal, [:string_content]]
39
+ when :string_literal then sexp[1][1] ? sexp[1][1][1] : ""
40
+
41
+ # [:@int, "10", [1, 14]]
42
+ when :@int then sexp[1].to_i
43
+
44
+ # [:@float, "10.56", [1, 14]]
45
+ when :@float then sexp[1].to_f
46
+
47
+ # [:var_ref, [:@kw, "true", [1, 12]]]
48
+ when :var_ref then
49
+ return true if sexp[1][1] == "true"
50
+ return false if sexp[1][1] == "false"
51
+ return nil if sexp[1][1] == "nil"
52
+ raise Parser::ParserNotImplemented, "Unknown variable: #{sexp[1].inspect}"
53
+
54
+ # [:array, [[:@int, "1", [1, 9]], [:@int, "4", [1, 12]]]]
55
+ # [:array, nil]
56
+ when :array then sexp[1] ? sexp[1].map { |sexp| identify(sexp) } : []
57
+
58
+ # [:hash,
59
+ # [:assoclist_from_args,
60
+ # [[:assoc_new,
61
+ # [:string_literal, [:string_content, [:@tstring_content, "utf8", [1, 2]]]],
62
+ # [:string_literal, [:string_content, [:@tstring_content, "✓", [1, 12]]]]]]]]]
63
+ # [:hash, nil]
64
+ when :hash then sexp[1] ? sexp[1][1].each_with_object({}) { |(_, key, value), hash| hash[identify(key)] = identify(value) } : {}
65
+
66
+ else
67
+ raise Parser::ParserNotImplemented, "I don't know how to identify #{sexp.inspect}"
68
+ nil
69
+ end
70
+ rescue
71
+ raise Parser::ParserNotImplemented, "An exception occurred when parsing #{sexp.inspect}\n#{$!.class.name}: #{$!.message}"
72
+ nil
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,161 @@
1
+ require "addressable/uri"
2
+ require "active_support/inflector"
3
+ require "logeater/params_parser"
4
+ require "logeater/parser_errors"
5
+
6
+ module Logeater
7
+ class Parser
8
+
9
+
10
+ LINE_MATCHER = /^
11
+ [A-Z],\s
12
+ \[(?<timestamp>[^\s\]]+)(?:\s[^\]]*)?\]\s+
13
+ (?<log_level>[A-Z]+)\s+\-\-\s:\s+
14
+ (?<message>.*)
15
+ $/x.freeze
16
+
17
+ TIMESTAMP_MATCHER = /
18
+ (?<year>\d\d\d\d)\-
19
+ (?<month>\d\d)\-
20
+ (?<day>\d\d)T
21
+ (?<hours>\d\d):
22
+ (?<minutes>\d\d):
23
+ (?<seconds>\d\d(?:\.\d+))
24
+ /x.freeze
25
+
26
+ HTTP_VERBS = %w{DELETE GET HEAD OPTIONS PATCH POST PUT}.freeze
27
+
28
+ REQUEST_LINE_MATCHER = /^
29
+ \[(?<subdomain>[^\]]+)\]\s
30
+ \[(?<uuid>[\w\-]{36})\]\s+
31
+ (?<message>.*)
32
+ $/x.freeze
33
+
34
+ REQUEST_STARTED_MATCHER = /^
35
+ Started\s
36
+ (?<http_method>[A-Z]+)\s
37
+ "(?<path>[^"]+)"\sfor\s
38
+ (?<remote_ip>[\d\.]+)
39
+ /x.freeze
40
+
41
+ REQUEST_CONTROLLER_MATCHER = /^
42
+ Processing\sby\s
43
+ (?<controller>[A-Za-z0-9:]+)\#
44
+ (?<action>[a-z_0-9]+)\sas\s
45
+ (?<format>.*)
46
+ /x.freeze
47
+
48
+ REQUEST_PARAMETERS_MATCHER = /^
49
+ Parameters:\s
50
+ (?<params>\{.*\})
51
+ $/x.freeze
52
+
53
+ REQUEST_COMPLETED_MATCHER = /^
54
+ Completed\s
55
+ (?<http_status>\d\d\d)\s
56
+ (?<http_response>.*)\sin\s
57
+ (?<duration>[\d\.]+)(?<units>ms)\b
58
+ /x.freeze # optional: (Views: 0.1ms | ActiveRecord: 50.0ms)
59
+
60
+ def parse!(line)
61
+ match = line.match LINE_MATCHER
62
+ raise UnmatchedLine.new(line) unless match
63
+
64
+ timestamp = match["timestamp"]
65
+ time = timestamp.match TIMESTAMP_MATCHER
66
+ raise MalformedTimestamp.new(timestamp) unless time
67
+ time = Time.new(*time.captures[0...-1], BigDecimal.new(time["seconds"]))
68
+
69
+ message = match["message"]
70
+
71
+ result = {
72
+ type: :generic,
73
+ timestamp: time,
74
+ log_level: match["log_level"],
75
+ message: message }
76
+
77
+ result.merge(parse_message(message))
78
+ end
79
+
80
+ def parse_message(message)
81
+ match = message.match REQUEST_LINE_MATCHER
82
+ return {} unless match
83
+
84
+ message = match["message"]
85
+ type = identify_request_line_type(message)
86
+
87
+ { subdomain: match["subdomain"],
88
+ uuid: match["uuid"],
89
+ type: type,
90
+ message: message }.merge(
91
+ custom_attributes_for(type, message))
92
+ end
93
+
94
+ def identify_request_line_type(message)
95
+ return :request_started if message =~ /^Started (#{HTTP_VERBS.join("|")})/
96
+ return :request_controller if message.start_with? "Processing by "
97
+ return :request_params if message.start_with? "Parameters: "
98
+ return :request_completed if message =~ /^Completed \d\d\d/
99
+ :request_line
100
+ end
101
+
102
+ def custom_attributes_for(type, message)
103
+ attributes = send :"parse_#{type}_message", message
104
+ unless attributes
105
+ log "Unable to parse message identified as #{type.inspect}: #{message.inspect}"
106
+ return {}
107
+ end
108
+ attributes
109
+ end
110
+
111
+ def parse_request_line_message(message)
112
+ {}
113
+ end
114
+
115
+ def parse_request_started_message(message)
116
+ match = message.match(REQUEST_STARTED_MATCHER)
117
+ return unless match
118
+ uri = Addressable::URI.parse(match["path"])
119
+
120
+ { http_method: match["http_method"],
121
+ path: uri.path,
122
+ remote_ip: match["remote_ip"] }
123
+ end
124
+
125
+ def parse_request_controller_message(message)
126
+ match = message.match(REQUEST_CONTROLLER_MATCHER)
127
+ return unless match
128
+
129
+ { controller: match["controller"].underscore.gsub(/_controller$/, ""),
130
+ action: match["action"],
131
+ format: match["format"] }
132
+ end
133
+
134
+ def parse_request_params_message(message)
135
+ match = message.match(REQUEST_PARAMETERS_MATCHER)
136
+ return unless match
137
+ params = ParamsParser.new(match["params"])
138
+
139
+ { params: params.parse! }
140
+ rescue Logeater::Parser::MalformedParameters
141
+ log "Unable to parse parameters: #{match["params"].inspect}"
142
+ { params: match["params"] }
143
+ end
144
+
145
+ def parse_request_completed_message(message)
146
+ match = message.match(REQUEST_COMPLETED_MATCHER)
147
+ return unless match
148
+
149
+ { http_status: match["http_status"].to_i,
150
+ http_response: match["http_response"],
151
+ duration: match["duration"].to_i }
152
+ end
153
+
154
+
155
+
156
+ def log(statement)
157
+ $stderr.puts "\e[33m#{statement}\e[0m"
158
+ end
159
+
160
+ end
161
+ end
@@ -0,0 +1,34 @@
1
+ module Logeater
2
+ class Parser
3
+
4
+ class Error < ::ArgumentError
5
+ def initialize(message, input)
6
+ super "#{message}: #{input.inspect}"
7
+ end
8
+ end
9
+
10
+ class UnmatchedLine < Error
11
+ def initialize(input)
12
+ super "Unmatched line", input
13
+ end
14
+ end
15
+
16
+ class MalformedTimestamp < Error
17
+ def initialize(input)
18
+ super "Malformed timestamp", input
19
+ end
20
+ end
21
+
22
+ class MalformedParameters < Error
23
+ def initialize(input)
24
+ super "Malformed parameters", input
25
+ end
26
+ end
27
+
28
+ class ParserNotImplemented < Error
29
+ def initialize(input)
30
+ super "Unable to parse", input
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,117 @@
1
+ require "logeater/request"
2
+ require "zlib"
3
+ require "ruby-progressbar"
4
+
5
+ module Logeater
6
+ class Reader
7
+ attr_reader :app, :path, :filename, :batch_size
8
+
9
+ def initialize(app, path, options={})
10
+ @app = app
11
+ @path = path
12
+ @filename = File.basename(path)
13
+ @parser = Logeater::Parser.new
14
+ @show_progress = options.fetch :progress, false
15
+ @batch_size = options.fetch :batch_size, 500
16
+ @verbose = options.fetch :verbose, false
17
+ @requests = {}
18
+ @completed_requests = []
19
+ end
20
+
21
+
22
+
23
+ def reimport
24
+ remove_existing_entries!
25
+ import
26
+ end
27
+
28
+ def import
29
+ each_line(&method(:process_line!))
30
+ save!
31
+ end
32
+
33
+ def remove_existing_entries!
34
+ Logeater::Request.where(app: app, logfile: filename).delete_all
35
+ end
36
+
37
+ def show_progress?
38
+ @show_progress
39
+ end
40
+
41
+ def verbose?
42
+ @verbose
43
+ end
44
+
45
+ def each_line
46
+ File.open(path) do |file|
47
+ io = File.extname(path) == ".gz" ? Zlib::GzipReader.new(file) : file
48
+ pbar = ProgressBar.create(title: filename, total: file.size, autofinish: false) if show_progress?
49
+ io.each_line do |line|
50
+ yield line
51
+ pbar.progress = file.pos if show_progress?
52
+ end
53
+ pbar.finish if show_progress?
54
+ end
55
+ end
56
+ alias :scan :each_line
57
+
58
+
59
+ private
60
+ attr_reader :parser, :requests, :completed_requests
61
+
62
+ def process_line!(line)
63
+ attributes = parser.parse!(line)
64
+
65
+ return if [:generic, :request_line].member? attributes[:type]
66
+
67
+ if attributes[:type] == :request_started
68
+ requests[attributes[:uuid]] = attributes
69
+ .slice(:uuid, :subdomain, :http_method, :path, :remote_ip)
70
+ .merge(started_at: attributes[:timestamp], logfile: filename, app: app)
71
+ return
72
+ end
73
+
74
+ request_attributes = requests[attributes[:uuid]]
75
+ unless request_attributes
76
+ log "Attempting to record #{attributes[:type].inspect}; but there is no request started with UUID #{attributes[:uuid].inspect}"
77
+ return
78
+ end
79
+
80
+ case attributes[:type]
81
+ when :request_controller
82
+ request_attributes.merge! attributes.slice(:controller, :action, :format)
83
+
84
+ when :request_params
85
+ request_attributes.merge! attributes.slice(:params)
86
+
87
+ when :request_completed
88
+ request_attributes.merge! attributes
89
+ .slice(:http_status, :http_response, :duration)
90
+ .merge(completed_at: attributes[:timestamp])
91
+
92
+ completed_requests.push Logeater::Request.new(request_attributes)
93
+ requests.delete attributes[:uuid]
94
+
95
+ save! if completed_requests.length >= batch_size
96
+ end
97
+
98
+ rescue Logeater::Parser::UnmatchedLine
99
+ $stderr.puts "\e[90m#{$!.message}\e[0m" if verbose?
100
+ rescue Logeater::Parser::Error
101
+ log $!.message
102
+ end
103
+
104
+ def save!
105
+ return if completed_requests.empty?
106
+ Logeater::Request.import(completed_requests)
107
+ completed_requests.clear
108
+ end
109
+
110
+
111
+
112
+ def log(statement)
113
+ $stderr.puts "\e[33m#{statement}\e[0m"
114
+ end
115
+
116
+ end
117
+ end
@@ -0,0 +1,14 @@
1
+ require "active_record"
2
+ require "activerecord-import"
3
+ require "activerecord-postgres-json"
4
+
5
+ module Logeater
6
+ class Request < ActiveRecord::Base
7
+ self.table_name = "requests"
8
+
9
+ serialize :params, ActiveRecord::Coders::JSON
10
+
11
+
12
+
13
+ end
14
+ end
@@ -0,0 +1,3 @@
1
+ module Logeater
2
+ VERSION = "0.1.1"
3
+ end
data/lib/logeater.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "logeater/parser"
2
+ require "logeater/reader"
3
+ require "logeater/request"
4
+ require "logeater/version"
5
+ require "yaml"
6
+ require "erb"
7
+
8
+ config_file = File.expand_path("../../db/config.yml", __FILE__)
9
+ config = YAML.load(ERB.new(File.read(config_file)).result).with_indifferent_access
10
+ ActiveRecord::Base.establish_connection config[ENV["RAILS_ENV"] || "development"]
11
+
12
+ module Logeater
13
+ end