active_record-sql_analyzer 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +15 -0
  5. data/CONTRIBUTING.md +9 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.md +202 -0
  8. data/README.md +119 -0
  9. data/Rakefile +6 -0
  10. data/active_record-sql_analyzer.gemspec +21 -0
  11. data/bin/ar-log-analyzer +10 -0
  12. data/lib/active_record/sql_analyzer.rb +40 -0
  13. data/lib/active_record/sql_analyzer/analyzer.rb +45 -0
  14. data/lib/active_record/sql_analyzer/background_processor.rb +54 -0
  15. data/lib/active_record/sql_analyzer/backtrace_filter.rb +38 -0
  16. data/lib/active_record/sql_analyzer/cli.rb +74 -0
  17. data/lib/active_record/sql_analyzer/cli_processor.rb +122 -0
  18. data/lib/active_record/sql_analyzer/compact_logger.rb +31 -0
  19. data/lib/active_record/sql_analyzer/configuration.rb +174 -0
  20. data/lib/active_record/sql_analyzer/logger.rb +29 -0
  21. data/lib/active_record/sql_analyzer/monkeypatches/query.rb +35 -0
  22. data/lib/active_record/sql_analyzer/monkeypatches/tagger.rb +24 -0
  23. data/lib/active_record/sql_analyzer/redacted_logger.rb +22 -0
  24. data/lib/active_record/sql_analyzer/redactor.rb +5 -0
  25. data/lib/active_record/sql_analyzer/version.rb +5 -0
  26. data/spec/active_record/sql_analyzer/analyzer_spec.rb +33 -0
  27. data/spec/active_record/sql_analyzer/background_processor_spec.rb +44 -0
  28. data/spec/active_record/sql_analyzer/backtrace_filter_spec.rb +28 -0
  29. data/spec/active_record/sql_analyzer/cli_processor_spec.rb +120 -0
  30. data/spec/active_record/sql_analyzer/cli_spec.rb +66 -0
  31. data/spec/active_record/sql_analyzer/end_to_end_spec.rb +121 -0
  32. data/spec/active_record/sql_analyzer/redacted_logger_spec.rb +67 -0
  33. data/spec/spec_helper.rb +34 -0
  34. data/spec/support/db_connection.rb +65 -0
  35. data/spec/support/stub_logger.rb +9 -0
  36. data/spec/support/stub_rails.rb +9 -0
  37. data/spec/support/wait_for_pop.rb +13 -0
  38. metadata +129 -0
@@ -0,0 +1,45 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class Analyzer
4
+ attr_reader :options
5
+
6
+ def initialize
7
+ @options = {}
8
+ end
9
+
10
+ def [](key)
11
+ @options[key]
12
+ end
13
+
14
+ # Tables to watch for this analyzer
15
+ def tables(names)
16
+ unless names.is_a?(Array)
17
+ raise ArgumentError, "Names of tables must be an array"
18
+ end
19
+
20
+ @options[:table_regex] = /\A\s*((SELECT|DELETE).*(FROM|JOIN)|(INSERT\s+INTO|UPDATE))\s+`?(#{names.join('|')})`?/i
21
+ end
22
+
23
+ # Logger class to use for recording data
24
+ def logger(klass)
25
+ @options[:logger] = klass
26
+ end
27
+
28
+ # How to tag the data
29
+ def name(name)
30
+ if !name.is_a?(String) && name !~ /\A([a-z0-9A-Z_]+)\z/
31
+ raise ArgumentError, "Name for this analyzer can only contain [a-z0-9A-Z_] characters"
32
+ end
33
+
34
+ @options[:name] = name
35
+ end
36
+
37
+ def setup
38
+ @options[:logger_instance] ||= (@options[:logger] || RedactedLogger).new(
39
+ SqlAnalyzer.config[:logger_root_path],
40
+ @options[:name]
41
+ )
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,54 @@
1
+ require "set"
2
+
3
+ module ActiveRecord
4
+ module SqlAnalyzer
5
+ class BackgroundProcessor
6
+ def initialize
7
+ @queue = Queue.new
8
+ end
9
+
10
+ def <<(event)
11
+ processor_thread
12
+ @queue << event
13
+ end
14
+
15
+ private
16
+
17
+ MUTEX = Mutex.new
18
+
19
+ def process_queue
20
+ event = @queue.pop
21
+
22
+ event[:caller] = SqlAnalyzer.config[:backtrace_filter_proc].call(event[:caller])
23
+ event[:sql] = SqlAnalyzer.config[:sql_redactor_complex_proc].call(event[:sql].dup)
24
+
25
+ logger = event.delete(:logger)
26
+ logger.filter_event(event)
27
+ logger.log(event)
28
+ end
29
+
30
+ def processor_thread
31
+ # Avoid grabbing a mutex unless we really need to
32
+ return if @thread && @thread.alive?
33
+
34
+ MUTEX.synchronize do
35
+ # Double check to avoid a race condition
36
+ return if @thread && @thread.alive?
37
+
38
+ @thread = Thread.new do
39
+ Rails.logger.info "[SQL-Analyzer] Starting background query thread id #{Thread.current.object_id} in pid #{Process.pid}"
40
+
41
+ begin
42
+ loop do
43
+ process_queue
44
+ end
45
+ rescue => ex
46
+ Rails.logger.warn "[SQL-Analyzer] Exception in thread #{Thread.current.object_id}: #{ex.class}, #{ex.message}"
47
+ Rails.logger.warn "[SQL-Analyzer] #{ex.backtrace.join(", ")}"
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,38 @@
1
+ require "pathname"
2
+
3
+ # This is a bit complex but can't be avoided since otherwise we have to log 5000000 backtrace lines
4
+ module ActiveRecord
5
+ module SqlAnalyzer
6
+ class BacktraceFilter
7
+ def self.library_paths
8
+ @library_paths ||= begin
9
+ paths = Gem.path + Gem.path.map { |f| File.realpath(f) }
10
+ paths << "(eval):"
11
+ paths << RbConfig::CONFIG.fetch('libdir')
12
+ paths
13
+ end
14
+ end
15
+
16
+ def self.rails_root_regex
17
+ @rails_root ||= %r{^#{Regexp.escape(Rails.root.to_s)}}
18
+ end
19
+
20
+ def self.proc
21
+ @proc ||= Proc.new do |lines|
22
+ filtered = []
23
+ lines.each do |line|
24
+ unless library_paths.any? { |path| line.include?(path) }
25
+ if line =~ rails_root_regex
26
+ filtered << Pathname.new(line).relative_path_from(Rails.root).to_s
27
+ else
28
+ filtered << line
29
+ end
30
+ end
31
+ end
32
+
33
+ filtered
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,74 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class CLI
4
+ attr_reader :options, :processor
5
+
6
+ def initialize
7
+ @options = {
8
+ concurrency: 6
9
+ }
10
+ end
11
+
12
+ def processor
13
+ @processor ||= ActiveRecord::SqlAnalyzer::CLIProcessor.new(options[:concurrency])
14
+ end
15
+
16
+ def run
17
+ definition_logs = Dir["#{options[:log_dir]}/*_definitions.log*"].map do |path|
18
+ [File.basename(path).gsub(/_definitions\.log.*/, ""), path]
19
+ end
20
+
21
+ if definition_logs.empty?
22
+ raise ArgumentError, "Cannot find any log files in '#{options[:log_dir]}'"
23
+ end
24
+
25
+ # Process the definition logs
26
+ processor.run_definition(definition_logs)
27
+
28
+ # Process the usage logs
29
+ usage_logs = Dir["#{options[:log_dir]}/{#{definition_logs.map(&:first).uniq.join(",")}}.log*"].map do |path|
30
+ [File.basename(path).split(".", 2).first, path]
31
+ end
32
+
33
+ processor.run_usage(usage_logs)
34
+
35
+ processor.dump(options[:dest_dir])
36
+ end
37
+
38
+ def parse_options(args)
39
+ opts = OptionParser.new
40
+
41
+ opts.on("--log-dir [DIR]", String, "Directory that logs are in") do |val|
42
+ unless Dir.exist?(val)
43
+ raise ArgumentError, "log directory '#{val}' does not exist"
44
+ end
45
+
46
+ options[:log_dir] = val
47
+ end
48
+
49
+ opts.on("--dest-dir [DIR]", String, "Directory to dump logs to") do |val|
50
+ unless Dir.exist?(val)
51
+ raise ArgumentError, "dest directory '#{val}' does not exist"
52
+ end
53
+
54
+ options[:dest_dir] = val
55
+ end
56
+
57
+ opts.on("-c", "--concurrency", Integer, "How many threads to use for processing log files") do |val|
58
+ if val <= 0
59
+ raise ArgumentError, "Concurrency must be >0"
60
+ end
61
+
62
+ options[:concurrency] = val
63
+ end
64
+
65
+ opts.on_tail("-h", "--help") do
66
+ puts opts
67
+ exit
68
+ end
69
+
70
+ opts.parse(args)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,122 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class CLIProcessor
4
+ attr_reader :concurrency, :definitions
5
+
6
+ def initialize(concurrency)
7
+ @concurrency = concurrency
8
+ @definitions = {}
9
+ end
10
+
11
+ def self.process_queue(queue)
12
+ local_data = {}
13
+
14
+ while !queue.empty? do
15
+ prefix, path = queue.pop
16
+ local_data[prefix] ||= {}
17
+
18
+ File.open(path, "r") do |io|
19
+ while !io.eof? do
20
+ yield local_data[prefix], io.readline.strip
21
+ end
22
+ end
23
+ end
24
+
25
+ local_data
26
+
27
+ rescue => ex
28
+ puts "#{ex.class}: #{ex.message}"
29
+ puts ex.backtrace
30
+ raise
31
+ end
32
+
33
+ def run_definition(logs)
34
+ queue = Queue.new
35
+ logs.each { |l| queue << l }
36
+
37
+ # Spin up threads to start processing the queue
38
+ threads = concurrency.times.map do
39
+ Thread.new(queue) do |t_queue|
40
+ # Create a local copy of each definitions then merge them in
41
+ CLIProcessor.process_queue(t_queue) do |local_definitions, line|
42
+ line.strip!
43
+
44
+ unless line == ""
45
+ sha, event = line.split("|", 2)
46
+ local_definitions[sha] = JSON.parse(event)
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ # Merge everything
53
+ threads.each do |thread|
54
+ thread.value.each do |prefix, data|
55
+ definitions[prefix] ||= {}
56
+ definitions[prefix].merge!(data)
57
+ end
58
+ end
59
+ end
60
+
61
+ def run_usage(logs)
62
+ queue = Queue.new
63
+ logs.each { |l| queue << l }
64
+
65
+ # Spin up threads to start processing the queue
66
+ threads = concurrency.times.map do
67
+ Thread.new(queue) do |t_queue|
68
+ # Create a local copy of the usage for each SHA then merge it in at the end
69
+ CLIProcessor.process_queue(t_queue) do |local_usage, line|
70
+ line.strip!
71
+
72
+ unless line == ""
73
+ last_called, sha = line.split("|", 2)
74
+ last_called = Time.at(last_called.to_i).utc
75
+
76
+ local_usage[sha] ||= {"count" => 0}
77
+ local_usage[sha]["count"] += 1
78
+
79
+ if !local_usage[sha]["last_called"] || local_usage[sha]["last_called"] < last_called
80
+ local_usage[sha]["last_called"] = last_called
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ # Merge everything
88
+ threads.each do |thread|
89
+ thread.value.each do |prefix, data|
90
+ definitions[prefix] ||= {}
91
+
92
+ data.each do |sha, usage|
93
+ definition = definitions[prefix][sha]
94
+ unless definition
95
+ puts "Undefined event '#{sha}'"
96
+ next
97
+ end
98
+
99
+ definition["count"] ||= 0
100
+ definition["count"] += usage["count"]
101
+
102
+ if !definition["last_called"] || definition["last_called"] < usage["last_called"]
103
+ definition["last_called"] = usage["last_called"]
104
+ end
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ def dump(dest_dir)
111
+ definitions.each do |prefix, data|
112
+ path = "#{dest_dir}/#{prefix}_#{Time.now.strftime("%Y-%m-%d")}.log"
113
+ puts "Writing logs to '#{path}' (#{data.length} queries)"
114
+
115
+ File.open(path, "w+") do |io|
116
+ io.write(data.to_json)
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,31 @@
1
+ require "digest"
2
+
3
+ module ActiveRecord
4
+ module SqlAnalyzer
5
+ class CompactLogger < Logger
6
+ attr_reader :logged_shas, :definition_log_file
7
+
8
+ def initialize(*)
9
+ super
10
+
11
+ @logged_shas = Set.new
12
+ @definition_log_file = File.open("#{log_root}/#{log_prefix}_definitions.log", "a+")
13
+ end
14
+
15
+ def log(event)
16
+ sha = Digest::MD5.hexdigest(event.to_s)
17
+ unless logged_shas.include?(sha)
18
+ definition_log_file.puts("#{sha}|#{event.to_json}")
19
+ logged_shas << sha
20
+ end
21
+
22
+ log_file.puts("#{Time.now.to_i}|#{sha}")
23
+ end
24
+
25
+ def close
26
+ @definition_log_file.close rescue nil
27
+ super
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,174 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class Configuration
4
+ attr_reader :options
5
+
6
+ def initialize
7
+ @options = {}
8
+ setup_defaults
9
+ end
10
+
11
+ # Setup a custom proc that filters out lines before passing them to the loggers.
12
+ # By default, this attempts to filter out all non-app code lines.
13
+ def backtrace_filter_proc(proc)
14
+ check_proc(proc, 1, "the backtrace lines")
15
+ @options[:backtrace_filter_proc] = proc
16
+ end
17
+
18
+ # Setup a new analyzer for monitoring tables
19
+ #
20
+ # add_analyzer(
21
+ # name: 'users',
22
+ # tables: %w(users permissions),
23
+ # logger: ActiveRecord::SqlAnalyzer::RedactedLogger
24
+ # )
25
+ #
26
+ # Will setup an analyzer that looks at the tables users and permissions
27
+ # when it finds relevant data, it passes it through the `RedactedLogger` class.
28
+ # When calling the proc passed to `log_sample_proc`, it will use the name
29
+ # `users` to help identify it, as well as when logging to disk.
30
+ #
31
+ def add_analyzer(result)
32
+ analyzer = Analyzer.new
33
+ analyzer.name(result[:name])
34
+ analyzer.tables(result[:tables])
35
+ analyzer.logger(result[:logger])
36
+ analyzer.setup
37
+
38
+ @options[:analyzers] << analyzer
39
+ analyzer
40
+ end
41
+
42
+ # Root path where all logs go.
43
+ # Defaults to `Rails.root.join('log')`
44
+ def logger_root_path(path)
45
+ unless Dir.exist?(path)
46
+ raise ArgumentError, "Path '#{path}' is not a directory"
47
+ end
48
+
49
+ @options[:logger_root_path] = path
50
+ end
51
+
52
+ # Set a proc that determines whether or not to log a single event.
53
+ # This must be set to log anything, and controls how many SQL queries you look at.
54
+ #
55
+ # Proc.new { |name| true }
56
+ #
57
+ # Will log everything no matter what
58
+ #
59
+ # Proc.new do |name|
60
+ # rand(1..100) <= 50
61
+ # end
62
+ #
63
+ # Will only log 50% of queries.
64
+ #
65
+ # You can hook this into something like Redis to allow dynamic control of the ratio
66
+ # without having to redeploy/restart your application.
67
+ #
68
+ def log_sample_proc(proc)
69
+ check_proc(proc, 1, "the analyzer name")
70
+ @options[:should_log_sample_proc] = proc
71
+ end
72
+
73
+ # For hooking in more complicated redactions beyond a simple find/replace.
74
+ def complex_sql_redactor_proc(proc)
75
+ check_proc(proc, 1, "the SQL statement")
76
+ @options[:sql_redactor_complex_proc] = proc
77
+ end
78
+
79
+ # Additional redactors to filter out data in SQL you don't want logged
80
+ def add_sql_redactors(list)
81
+ @options[:sql_redactors].concat(create_redactors(list))
82
+ end
83
+
84
+ # Backtrace redactors filter out data in the backtrace
85
+ # useful if you want to get rid of lines numbers
86
+ def add_backtrace_redactors(list)
87
+ @options[:backtrace_redactors].concat(create_redactors(list))
88
+ end
89
+
90
+ # If the first line in the backtrace matches the regex given, we switch to
91
+ # ambiguous tracing mode for that call where we log more of the backtrace.
92
+ #
93
+ # As an example, if you find you're only getting middleware, you could use:
94
+ #
95
+ # %r{\Aapp/middleware/query_string_sanitizer\.rb:\d+:in `call'\z}
96
+ #
97
+ # Which would log up to ambiguous_backtrace_lines (default 3) total lines,
98
+ # rather than the default 1.
99
+ def add_ambiguous_tracers(list)
100
+ list.each do |row|
101
+ unless row.is_a?(Regexp)
102
+ raise ArgumentError, "Tracing filters must be a Regexp to match on"
103
+ end
104
+ end
105
+
106
+ @options[:ambiguous_tracers].concat(list)
107
+ end
108
+
109
+ # How many total lines to log when the caller is ambiguous
110
+ def ambiguous_backtrace_lines(lines)
111
+ if !lines.is_a?(Fixnum)
112
+ raise ArgumentError, "Lines must be a Fixnum"
113
+ elsif lines <= 1
114
+ raise ArgumentError, "Lines cannot be <= 1"
115
+ end
116
+
117
+ @options[:ambiguous_backtrace_lines] = lines
118
+ end
119
+
120
+ def [](key)
121
+ @options[key]
122
+ end
123
+
124
+ private
125
+
126
+ def check_proc(proc, arity, msg)
127
+ if !proc.is_a?(Proc)
128
+ raise ArgumentError, "You must pass a proc"
129
+ elsif proc.arity != 1
130
+ raise ArgumentError, "Proc must accept 1 argument for #{msg}"
131
+ end
132
+ end
133
+
134
+ def create_redactors(list)
135
+ list.map do |redact|
136
+ if redact.length != 2
137
+ raise ArgumentError, "Redactor row should only have two entries"
138
+ elsif !redact.first.is_a?(Regexp)
139
+ raise ArgumentError, "First value in pair must be a Regexp to match on"
140
+ elsif !redact.last.is_a?(String)
141
+ raise ArgumentError, "Last value in pair must be a String to replace with"
142
+ end
143
+
144
+ Redactor.new(*redact)
145
+ end
146
+ end
147
+
148
+ def setup_defaults
149
+ @options[:sql_redactors] = [
150
+ Redactor.new(/\n/, " "),
151
+ Redactor.new(/\s+/, " "),
152
+ Redactor.new(/(\s|\b|`)(=|!=|>=|>|<=|<) ?(BINARY )?-?\d+(\.\d+)?/, " = [REDACTED]"),
153
+ Redactor.new(/(\s|\b|`)(=|!=|>=|>|<=|<) ?(BINARY )?x?'[^']*'/, " = '[REDACTED]'"),
154
+ Redactor.new(/VALUES \(.+\)$/, "VALUES ([REDACTED])"),
155
+ Redactor.new(/IN \([^)]+\)/, "IN ([REDACTED])"),
156
+ Redactor.new(/BETWEEN '[^']*' AND '[^']*'/, "BETWEEN '[REDACTED]' AND '[REDACTED]'"),
157
+ Redactor.new(/LIKE '[^'\\]*(?:\\.[^'\\]*)*'/, "LIKE '[REDACTED]'"),
158
+ Redactor.new(/ LIMIT \d+/, ""),
159
+ Redactor.new(/ OFFSET \d+/, ""),
160
+ Redactor.new(/INSERT INTO (`?\w+`?) \([^)]+\)/, 'INSERT INTO \1 ([COLUMNS])'),
161
+ ]
162
+
163
+ @options[:should_log_sample_proc] = Proc.new { |_name| false }
164
+ @options[:sql_redactor_complex_proc] = Proc.new { |sql| sql }
165
+ @options[:backtrace_redactors] = []
166
+ @options[:ambiguous_tracers] = []
167
+ @options[:ambiguous_backtrace_lines] = 3
168
+ @options[:analyzers] = []
169
+ @options[:logger_root_path] = Rails.root.join('log')
170
+ @options[:backtrace_filter_proc] = BacktraceFilter.proc
171
+ end
172
+ end
173
+ end
174
+ end