active_record-sql_analyzer 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +15 -0
  5. data/CONTRIBUTING.md +9 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.md +202 -0
  8. data/README.md +119 -0
  9. data/Rakefile +6 -0
  10. data/active_record-sql_analyzer.gemspec +21 -0
  11. data/bin/ar-log-analyzer +10 -0
  12. data/lib/active_record/sql_analyzer.rb +40 -0
  13. data/lib/active_record/sql_analyzer/analyzer.rb +45 -0
  14. data/lib/active_record/sql_analyzer/background_processor.rb +54 -0
  15. data/lib/active_record/sql_analyzer/backtrace_filter.rb +38 -0
  16. data/lib/active_record/sql_analyzer/cli.rb +74 -0
  17. data/lib/active_record/sql_analyzer/cli_processor.rb +122 -0
  18. data/lib/active_record/sql_analyzer/compact_logger.rb +31 -0
  19. data/lib/active_record/sql_analyzer/configuration.rb +174 -0
  20. data/lib/active_record/sql_analyzer/logger.rb +29 -0
  21. data/lib/active_record/sql_analyzer/monkeypatches/query.rb +35 -0
  22. data/lib/active_record/sql_analyzer/monkeypatches/tagger.rb +24 -0
  23. data/lib/active_record/sql_analyzer/redacted_logger.rb +22 -0
  24. data/lib/active_record/sql_analyzer/redactor.rb +5 -0
  25. data/lib/active_record/sql_analyzer/version.rb +5 -0
  26. data/spec/active_record/sql_analyzer/analyzer_spec.rb +33 -0
  27. data/spec/active_record/sql_analyzer/background_processor_spec.rb +44 -0
  28. data/spec/active_record/sql_analyzer/backtrace_filter_spec.rb +28 -0
  29. data/spec/active_record/sql_analyzer/cli_processor_spec.rb +120 -0
  30. data/spec/active_record/sql_analyzer/cli_spec.rb +66 -0
  31. data/spec/active_record/sql_analyzer/end_to_end_spec.rb +121 -0
  32. data/spec/active_record/sql_analyzer/redacted_logger_spec.rb +67 -0
  33. data/spec/spec_helper.rb +34 -0
  34. data/spec/support/db_connection.rb +65 -0
  35. data/spec/support/stub_logger.rb +9 -0
  36. data/spec/support/stub_rails.rb +9 -0
  37. data/spec/support/wait_for_pop.rb +13 -0
  38. metadata +129 -0
@@ -0,0 +1,29 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class Logger
4
+ attr_reader :log_file, :log_prefix, :log_root, :config
5
+
6
+ def initialize(log_root, log_prefix)
7
+ @log_prefix = log_prefix
8
+ @log_root = log_root
9
+ @config = SqlAnalyzer.config
10
+
11
+ @log_file = File.open("#{log_root}/#{log_prefix}.log", "a")
12
+ end
13
+
14
+ # Log the raw event data directly to disk
15
+ def log(event)
16
+ log_file.puts(event.to_json)
17
+ end
18
+
19
+ # Further redact or remove any other information from an event
20
+ def filter_event(event)
21
+ event
22
+ end
23
+
24
+ def close
25
+ @log_file.close rescue nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,35 @@
1
+
2
+ module ActiveRecord
3
+ module SqlAnalyzer
4
+ module Monkeypatches
5
+ module Query
6
+ def execute(sql, *args)
7
+ return super unless SqlAnalyzer.config
8
+
9
+ safe_sql = nil
10
+
11
+ SqlAnalyzer.config[:analyzers].each do |analyzer|
12
+ if SqlAnalyzer.config[:should_log_sample_proc].call(analyzer[:name])
13
+ # This is here rather than above intentionally.
14
+ # We assume we're not going to be analyzing 100% of queries and want to only re-encode
15
+ # when it's actually relevant.
16
+ safe_sql ||= sql.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
17
+
18
+ if safe_sql =~ analyzer[:table_regex]
19
+ SqlAnalyzer.background_processor << {
20
+ sql: safe_sql,
21
+ caller: caller,
22
+ logger: analyzer[:logger_instance],
23
+ tag: Thread.current[:_ar_analyzer_tag],
24
+ request_path: Thread.current[:_ar_analyzer_request_path]
25
+ }
26
+ end
27
+ end
28
+ end
29
+
30
+ super
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ module Monkeypatches
4
+ module Tagger
5
+ def initialize(*)
6
+ super
7
+ @_ar_analyzer_tag = nil
8
+ end
9
+
10
+ def with_tag(name)
11
+ @_ar_analyzer_tag = name
12
+ self
13
+ end
14
+
15
+ def exec_queries
16
+ Thread.current[:_ar_analyzer_tag] ||= @_ar_analyzer_tag
17
+ super
18
+ ensure
19
+ Thread.current[:_ar_analyzer_tag] = nil if @_ar_analyzer_tag
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class RedactedLogger < CompactLogger
4
+ def filter_event(event)
5
+ # Determine if we're doing extended tracing or only the first
6
+ if config[:ambiguous_tracers].any? { |regex| event[:caller].first =~ regex }
7
+ event[:caller] = event[:caller][0, config[:ambiguous_backtrace_lines]].join(", ")
8
+ else
9
+ event[:caller] = event[:caller].first
10
+ end
11
+
12
+ config[:backtrace_redactors].each do |redactor|
13
+ event[:caller].gsub!(redactor.search, redactor.replace)
14
+ end
15
+
16
+ config[:sql_redactors].each do |redactor|
17
+ event[:sql].gsub!(redactor.search, redactor.replace)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ Redactor = Struct.new(:search, :replace)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ VERSION = '0.0.5'
4
+ end
5
+ end
@@ -0,0 +1,33 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::Analyzer do
2
+ let(:analyzer) do
3
+ described_class.new.tap do |instance|
4
+ instance.tables %w(foo bar)
5
+ end
6
+ end
7
+
8
+ context "table regex" do
9
+ let(:regex) { analyzer[:table_regex] }
10
+
11
+ it "matches" do
12
+ expect("SELECT * FROM foo").to match(regex)
13
+ expect("DELETE FROM foo").to match(regex)
14
+ expect("INSERT INTO bar (a, b, c) VALUES (1, 2, 3)").to match(regex)
15
+ expect("UPDATE bar SET a=b WHERE id=1").to match(regex)
16
+ end
17
+
18
+ it "matches with complex queries" do
19
+ expect("SELECT * FROM apple JOIN foo").to match(regex)
20
+ expect("SELECT * FROM apple LEFT JOIN foo").to match(regex)
21
+ expect("SELECT * FROM apple WHERE id = (SELECT * FROM foo)").to match(regex)
22
+ end
23
+
24
+ it "does not match" do
25
+ expect("SELECT * FROM apple WHERE id='foo'").to_not match(regex)
26
+ expect("SELECT foo FROM apple WHERE id='bar'").to_not match(regex)
27
+
28
+ expect("DELETE FROM apple").to_not match(regex)
29
+ expect("INSERT INTO apple (a, b, c) VALUES (1, 2, 3)").to_not match(regex)
30
+ expect("UPDATE apple SET a=b WHERE id=1").to_not match(regex)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,44 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::BackgroundProcessor do
2
+ include WaitForPop
3
+
4
+ let(:instance) { described_class.new }
5
+
6
+ let(:event) { {caller: "CALLER", sql: "SQL", logger: logger} }
7
+
8
+ let(:logger) do
9
+ Class.new do
10
+ def self.events
11
+ @events ||= []
12
+ end
13
+
14
+ def self.filter_event(*)
15
+ end
16
+
17
+ def self.log(event)
18
+ events << event
19
+ sleep 2
20
+ end
21
+ end
22
+ end
23
+
24
+ before do
25
+ ActiveRecord::SqlAnalyzer.configure do |c|
26
+ c.backtrace_filter_proc Proc.new { |lines| "BFP #{lines}" }
27
+ c.complex_sql_redactor_proc Proc.new { |sql| "CSRP #{sql}" }
28
+ end
29
+ end
30
+
31
+ it "processes in the background" do
32
+ instance << event
33
+ wait_for_pop
34
+
35
+ expect(logger.events).to eq(
36
+ [
37
+ {
38
+ caller: "BFP CALLER",
39
+ sql: "CSRP SQL"
40
+ }
41
+ ]
42
+ )
43
+ end
44
+ end
@@ -0,0 +1,28 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::BacktraceFilter do
2
+ before do
3
+ ActiveRecord::SqlAnalyzer.configure { |_c| }
4
+ end
5
+
6
+ it "filters non-app paths" do
7
+ lines = ActiveRecord::SqlAnalyzer.config[:backtrace_filter_proc].call(
8
+ [
9
+ "foo/bar:1 in 'method'",
10
+ "#{Gem.path.first}:4231 in 'method'",
11
+ "foo/bar:2 in 'method'",
12
+ "#{File.realpath(Gem.path.first)}:9531 in 'method'",
13
+ "foo/bar:3 in 'method'",
14
+ "(eval):1234 in 'method'",
15
+ "foo/bar:4 in 'method'"
16
+ ]
17
+ )
18
+
19
+ expect(lines).to eq(
20
+ [
21
+ "foo/bar:1 in 'method'",
22
+ "foo/bar:2 in 'method'",
23
+ "foo/bar:3 in 'method'",
24
+ "foo/bar:4 in 'method'"
25
+ ]
26
+ )
27
+ end
28
+ end
@@ -0,0 +1,120 @@
1
+ require_relative "../../../lib/active_record/sql_analyzer/cli_processor"
2
+
3
+ RSpec.describe ActiveRecord::SqlAnalyzer::CLIProcessor do
4
+ let(:tmp_dir) { Dir.mktmpdir }
5
+ after { FileUtils.remove_entry(tmp_dir) }
6
+
7
+ def write_logs(prefix, *events)
8
+ started_at = Time.utc(2015, 1, 1, 0)
9
+
10
+ events.each_slice(2).each_with_index do |lines, index|
11
+ logger = ActiveRecord::SqlAnalyzer::CompactLogger.new(
12
+ tmp_dir,
13
+ "#{prefix}_#{index}"
14
+ )
15
+
16
+ lines.each do |event|
17
+ started_at += 3600
18
+
19
+ Timecop.freeze(started_at) do
20
+ logger.log(event)
21
+ end
22
+ end
23
+
24
+ logger.close
25
+ end
26
+ end
27
+
28
+ let(:instance) { described_class.new(2) }
29
+
30
+ before do
31
+ write_logs(:foo,
32
+ {sql: "F-SQL1", caller: "CALLER1", tag: true},
33
+ {sql: "F-SQL2", caller: "CALLER2", tag: true},
34
+ {sql: "F-SQL1", caller: "CALLER1", tag: true},
35
+ {sql: "F-SQL3", caller: "CALLER3", tag: true},
36
+ {sql: "F-SQL2", caller: "CALLER2", tag: true}
37
+ )
38
+
39
+ write_logs(:bar,
40
+ {sql: "B-SQL1", caller: "CALLER1"},
41
+ {sql: "B-SQL2", caller: "CALLER2"},
42
+ {sql: "B-SQL3", caller: "CALLER3"},
43
+ {sql: "B-SQL2", caller: "CALLER2"},
44
+ {sql: "B-SQL1", caller: "CALLER1"}
45
+ )
46
+ end
47
+
48
+ subject(:process) do
49
+ instance.run_definition(
50
+ Dir["#{tmp_dir}/*_*_definitions.log"].map do |path|
51
+ [File.basename(path).split("_", 2).first, path]
52
+ end
53
+ )
54
+
55
+ instance.run_usage(
56
+ Dir["#{tmp_dir}/*_*.log"].map do |path|
57
+ next if path =~ /definitions\.log$/
58
+ [File.basename(path).split("_", 2).first, path]
59
+ end.compact
60
+ )
61
+
62
+ instance.definitions
63
+ end
64
+
65
+ it "processes logs" do
66
+ logs = process
67
+
68
+ expect(logs["foo"].length).to eq(3)
69
+
70
+ queries = logs["foo"].values.sort_by { |row| row["sql"] }
71
+
72
+ expect(queries[0]["sql"]).to eq("F-SQL1")
73
+ expect(queries[0]["caller"]).to eq("CALLER1")
74
+ expect(queries[0]["count"]).to eq(2)
75
+ expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 3))
76
+ expect(queries[0]["tag"]).to eq(true)
77
+
78
+ expect(queries[1]["sql"]).to eq("F-SQL2")
79
+ expect(queries[1]["caller"]).to eq("CALLER2")
80
+ expect(queries[1]["count"]).to eq(2)
81
+ expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 5))
82
+ expect(queries[1]["tag"]).to eq(true)
83
+
84
+ expect(queries[2]["sql"]).to eq("F-SQL3")
85
+ expect(queries[2]["caller"]).to eq("CALLER3")
86
+ expect(queries[2]["count"]).to eq(1)
87
+ expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 4))
88
+ expect(queries[2]["tag"]).to eq(true)
89
+
90
+ expect(logs["bar"].length).to eq(3)
91
+
92
+ queries = logs["bar"].values.sort_by { |row| row["sql"] }
93
+
94
+ expect(queries[0]["sql"]).to eq("B-SQL1")
95
+ expect(queries[0]["caller"]).to eq("CALLER1")
96
+ expect(queries[0]["count"]).to eq(2)
97
+ expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 5))
98
+ expect(queries[0]["tag"]).to eq(nil)
99
+
100
+ expect(queries[1]["sql"]).to eq("B-SQL2")
101
+ expect(queries[1]["caller"]).to eq("CALLER2")
102
+ expect(queries[1]["count"]).to eq(2)
103
+ expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 4))
104
+ expect(queries[1]["tag"]).to eq(nil)
105
+
106
+ expect(queries[2]["sql"]).to eq("B-SQL3")
107
+ expect(queries[2]["caller"]).to eq("CALLER3")
108
+ expect(queries[2]["count"]).to eq(1)
109
+ expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 3))
110
+ expect(queries[2]["tag"]).to eq(nil)
111
+ end
112
+
113
+ it "dumps to disk" do
114
+ process
115
+ instance.dump(tmp_dir)
116
+
117
+ paths = Dir["#{tmp_dir}/{foo,bar}_#{Time.now.strftime("%Y-%m-%d")}.log"]
118
+ expect(paths.length).to eq(2)
119
+ end
120
+ end
@@ -0,0 +1,66 @@
1
+ require "fileutils"
2
+ require_relative "../../../lib/active_record/sql_analyzer/cli"
3
+ require_relative "../../../lib/active_record/sql_analyzer/cli_processor"
4
+
5
+ RSpec.describe ActiveRecord::SqlAnalyzer::CLI do
6
+ let(:tmp_dir) { Dir.mktmpdir }
7
+ after { FileUtils.remove_entry(tmp_dir) }
8
+
9
+ let(:instance) do
10
+ cli = described_class.new
11
+ cli.parse_options(["--log-dir", tmp_dir, "--dest-dir", tmp_dir])
12
+ cli
13
+ end
14
+
15
+ before do
16
+ %w(foo bar).each do |prefix|
17
+ 3.times do |i|
18
+ FileUtils.touch("#{tmp_dir}/#{prefix}.log.#{i}")
19
+ FileUtils.touch("#{tmp_dir}/#{prefix}_definitions.log.#{i}")
20
+ end
21
+ end
22
+ end
23
+
24
+ # I'm sorry
25
+ it "parses logs and starts the processor" do
26
+ expect(instance.processor).to receive(:run_definition) do |paths|
27
+ expect(paths.length).to eq(6)
28
+
29
+ prefixes = paths[0, 3].map(&:first).uniq
30
+ logs = paths[0, 3].map(&:last)
31
+ expect(prefixes.length).to eq(1)
32
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/)
33
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/)
34
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/)
35
+
36
+ prefixes = paths[3, 6].map(&:first).uniq
37
+ logs = paths[3, 6].map(&:last)
38
+ expect(prefixes.length).to eq(1)
39
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/)
40
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/)
41
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/)
42
+ end
43
+
44
+ expect(instance.processor).to receive(:run_usage) do |paths|
45
+ expect(paths.length).to eq(6)
46
+
47
+ prefixes = paths[0, 3].map(&:first).uniq
48
+ logs = paths[0, 3].map(&:last)
49
+ expect(prefixes.length).to eq(1)
50
+ expect(logs).to include(/#{prefixes.first}\.log\.0/)
51
+ expect(logs).to include(/#{prefixes.first}\.log\.1/)
52
+ expect(logs).to include(/#{prefixes.first}\.log\.2/)
53
+
54
+ prefixes = paths[3, 6].map(&:first).uniq
55
+ logs = paths[3, 6].map(&:last)
56
+ expect(prefixes.length).to eq(1)
57
+ expect(logs).to include(/#{prefixes.first}\.log\.0/)
58
+ expect(logs).to include(/#{prefixes.first}\.log\.1/)
59
+ expect(logs).to include(/#{prefixes.first}\.log\.2/)
60
+ end
61
+
62
+ expect(instance.processor).to receive(:dump).with(tmp_dir)
63
+
64
+ instance.run
65
+ end
66
+ end
@@ -0,0 +1,121 @@
1
+ RSpec.describe "End to End" do
2
+ include WaitForPop
3
+
4
+ let(:tmp_dir) { Dir.mktmpdir }
5
+ after { FileUtils.remove_entry(tmp_dir) }
6
+
7
+ let(:log_path) { "#{tmp_dir}/test_tag.log" }
8
+ let(:log_data) { File.read(log_path) }
9
+
10
+ let(:log_hash) do
11
+ hash = {}
12
+ log_data.split("\n").each do |line|
13
+ sha = line.split("|", 2).last
14
+ hash[sha] ||= 0
15
+ hash[sha] += 1
16
+ end
17
+ hash
18
+ end
19
+
20
+ let(:log_reverse_hash) do
21
+ Hash[log_hash.map(&:reverse)]
22
+ end
23
+
24
+ let(:log_def_path) { "#{tmp_dir}/test_tag_definitions.log" }
25
+ let(:log_def_data) { File.read(log_def_path) }
26
+ let(:log_def_hash) do
27
+ hash = {}
28
+ log_def_data.split("\n").each do |line|
29
+ sha, event = line.split("|", 2)
30
+ hash[sha] = JSON.parse(event)
31
+ end
32
+
33
+ hash
34
+ end
35
+
36
+ before do
37
+ ActiveRecord::SqlAnalyzer.configure do |c|
38
+ c.logger_root_path tmp_dir
39
+ c.log_sample_proc Proc.new { |_name| true }
40
+
41
+ c.add_analyzer(
42
+ name: :test_tag,
43
+ tables: %w(matching_table)
44
+ )
45
+ end
46
+
47
+ ActiveRecord::SqlAnalyzer.config[:analyzers].each do |analyzer|
48
+ analyzer[:logger_instance].definition_log_file.sync = true
49
+ analyzer[:logger_instance].log_file.sync = true
50
+ end
51
+ end
52
+
53
+ def execute(sql)
54
+ DBConnection.connection.execute(sql)
55
+ wait_for_pop
56
+ end
57
+
58
+ it "does not log with a non-matching table" do
59
+ execute "SELECT * FROM nonmatching_table"
60
+
61
+ expect(log_data).to eq("")
62
+ expect(log_def_data).to eq("")
63
+ end
64
+
65
+ it "logs with a matching + non-matching table in one query" do
66
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 1234"
67
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 4321"
68
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.test_string = 'abc'"
69
+
70
+ expect(log_hash.length).to eq(2)
71
+
72
+ id_sha = log_reverse_hash[2]
73
+ str_sha = log_reverse_hash[1]
74
+
75
+ expect(log_def_hash.length).to eq(2)
76
+
77
+ expect(log_def_hash[id_sha]["sql"]).to include("mt.id = [REDACTED]")
78
+ expect(log_def_hash[str_sha]["sql"]).to include("mt.test_string = '[REDACTED]'")
79
+ end
80
+
81
+ it "logs with only a matching table in a query" do
82
+ execute "SELECT * FROM matching_table WHERE id = 1234"
83
+ execute "SELECT * FROM matching_table WHERE id = 4321"
84
+ execute "SELECT * FROM matching_table WHERE test_string = 'abc'"
85
+
86
+ expect(log_hash.length).to eq(2)
87
+
88
+ id_sha = log_reverse_hash[2]
89
+ str_sha = log_reverse_hash[1]
90
+
91
+ expect(log_def_hash.length).to eq(2)
92
+
93
+ expect(log_def_hash[id_sha]["sql"]).to include("id = [REDACTED]")
94
+ expect(log_def_hash[str_sha]["sql"]).to include("test_string = '[REDACTED]'")
95
+ end
96
+
97
+ it "handles invalid UTF-8" do
98
+ execute "SELECT * FROM matching_table WHERE test_string = '\xe5'"
99
+ execute "SELECT * FROM matching_table WHERE test_string = 'foobar'"
100
+
101
+ expect(log_reverse_hash.first.first).to eq(2)
102
+
103
+ sha = log_reverse_hash[2]
104
+ expect(log_def_hash[sha]["sql"]).to include("test_string = '[REDACTED]'")
105
+ end
106
+
107
+ context "when sampling is disabled" do
108
+ before do
109
+ ActiveRecord::SqlAnalyzer.configure do |c|
110
+ c.log_sample_proc Proc.new { |_name| false }
111
+ end
112
+ end
113
+
114
+ it "does not log" do
115
+ execute "SELECT * FROM matching_table"
116
+
117
+ expect(log_data).to eq("")
118
+ expect(log_def_data).to eq("")
119
+ end
120
+ end
121
+ end