active_record-sql_analyzer 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +15 -0
  5. data/CONTRIBUTING.md +9 -0
  6. data/Gemfile +12 -0
  7. data/LICENSE.md +202 -0
  8. data/README.md +119 -0
  9. data/Rakefile +6 -0
  10. data/active_record-sql_analyzer.gemspec +21 -0
  11. data/bin/ar-log-analyzer +10 -0
  12. data/lib/active_record/sql_analyzer.rb +40 -0
  13. data/lib/active_record/sql_analyzer/analyzer.rb +45 -0
  14. data/lib/active_record/sql_analyzer/background_processor.rb +54 -0
  15. data/lib/active_record/sql_analyzer/backtrace_filter.rb +38 -0
  16. data/lib/active_record/sql_analyzer/cli.rb +74 -0
  17. data/lib/active_record/sql_analyzer/cli_processor.rb +122 -0
  18. data/lib/active_record/sql_analyzer/compact_logger.rb +31 -0
  19. data/lib/active_record/sql_analyzer/configuration.rb +174 -0
  20. data/lib/active_record/sql_analyzer/logger.rb +29 -0
  21. data/lib/active_record/sql_analyzer/monkeypatches/query.rb +35 -0
  22. data/lib/active_record/sql_analyzer/monkeypatches/tagger.rb +24 -0
  23. data/lib/active_record/sql_analyzer/redacted_logger.rb +22 -0
  24. data/lib/active_record/sql_analyzer/redactor.rb +5 -0
  25. data/lib/active_record/sql_analyzer/version.rb +5 -0
  26. data/spec/active_record/sql_analyzer/analyzer_spec.rb +33 -0
  27. data/spec/active_record/sql_analyzer/background_processor_spec.rb +44 -0
  28. data/spec/active_record/sql_analyzer/backtrace_filter_spec.rb +28 -0
  29. data/spec/active_record/sql_analyzer/cli_processor_spec.rb +120 -0
  30. data/spec/active_record/sql_analyzer/cli_spec.rb +66 -0
  31. data/spec/active_record/sql_analyzer/end_to_end_spec.rb +121 -0
  32. data/spec/active_record/sql_analyzer/redacted_logger_spec.rb +67 -0
  33. data/spec/spec_helper.rb +34 -0
  34. data/spec/support/db_connection.rb +65 -0
  35. data/spec/support/stub_logger.rb +9 -0
  36. data/spec/support/stub_rails.rb +9 -0
  37. data/spec/support/wait_for_pop.rb +13 -0
  38. metadata +129 -0
@@ -0,0 +1,29 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class Logger
4
+ attr_reader :log_file, :log_prefix, :log_root, :config
5
+
6
+ def initialize(log_root, log_prefix)
7
+ @log_prefix = log_prefix
8
+ @log_root = log_root
9
+ @config = SqlAnalyzer.config
10
+
11
+ @log_file = File.open("#{log_root}/#{log_prefix}.log", "a")
12
+ end
13
+
14
+ # Log the raw event data directly to disk
15
+ def log(event)
16
+ log_file.puts(event.to_json)
17
+ end
18
+
19
+ # Further redact or remove any other information from an event
20
+ def filter_event(event)
21
+ event
22
+ end
23
+
24
+ def close
25
+ @log_file.close rescue nil
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,35 @@
1
+
2
+ module ActiveRecord
3
+ module SqlAnalyzer
4
+ module Monkeypatches
5
+ module Query
6
+ def execute(sql, *args)
7
+ return super unless SqlAnalyzer.config
8
+
9
+ safe_sql = nil
10
+
11
+ SqlAnalyzer.config[:analyzers].each do |analyzer|
12
+ if SqlAnalyzer.config[:should_log_sample_proc].call(analyzer[:name])
13
+ # This is here rather than above intentionally.
14
+ # We assume we're not going to be analyzing 100% of queries and want to only re-encode
15
+ # when it's actually relevant.
16
+ safe_sql ||= sql.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
17
+
18
+ if safe_sql =~ analyzer[:table_regex]
19
+ SqlAnalyzer.background_processor << {
20
+ sql: safe_sql,
21
+ caller: caller,
22
+ logger: analyzer[:logger_instance],
23
+ tag: Thread.current[:_ar_analyzer_tag],
24
+ request_path: Thread.current[:_ar_analyzer_request_path]
25
+ }
26
+ end
27
+ end
28
+ end
29
+
30
+ super
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,24 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ module Monkeypatches
4
+ module Tagger
5
+ def initialize(*)
6
+ super
7
+ @_ar_analyzer_tag = nil
8
+ end
9
+
10
+ def with_tag(name)
11
+ @_ar_analyzer_tag = name
12
+ self
13
+ end
14
+
15
+ def exec_queries
16
+ Thread.current[:_ar_analyzer_tag] ||= @_ar_analyzer_tag
17
+ super
18
+ ensure
19
+ Thread.current[:_ar_analyzer_tag] = nil if @_ar_analyzer_tag
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,22 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ class RedactedLogger < CompactLogger
4
+ def filter_event(event)
5
+ # Determine if we're doing extended tracing or only the first
6
+ if config[:ambiguous_tracers].any? { |regex| event[:caller].first =~ regex }
7
+ event[:caller] = event[:caller][0, config[:ambiguous_backtrace_lines]].join(", ")
8
+ else
9
+ event[:caller] = event[:caller].first
10
+ end
11
+
12
+ config[:backtrace_redactors].each do |redactor|
13
+ event[:caller].gsub!(redactor.search, redactor.replace)
14
+ end
15
+
16
+ config[:sql_redactors].each do |redactor|
17
+ event[:sql].gsub!(redactor.search, redactor.replace)
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,5 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ Redactor = Struct.new(:search, :replace)
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ module ActiveRecord
2
+ module SqlAnalyzer
3
+ VERSION = '0.0.5'
4
+ end
5
+ end
@@ -0,0 +1,33 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::Analyzer do
2
+ let(:analyzer) do
3
+ described_class.new.tap do |instance|
4
+ instance.tables %w(foo bar)
5
+ end
6
+ end
7
+
8
+ context "table regex" do
9
+ let(:regex) { analyzer[:table_regex] }
10
+
11
+ it "matches" do
12
+ expect("SELECT * FROM foo").to match(regex)
13
+ expect("DELETE FROM foo").to match(regex)
14
+ expect("INSERT INTO bar (a, b, c) VALUES (1, 2, 3)").to match(regex)
15
+ expect("UPDATE bar SET a=b WHERE id=1").to match(regex)
16
+ end
17
+
18
+ it "matches with complex queries" do
19
+ expect("SELECT * FROM apple JOIN foo").to match(regex)
20
+ expect("SELECT * FROM apple LEFT JOIN foo").to match(regex)
21
+ expect("SELECT * FROM apple WHERE id = (SELECT * FROM foo)").to match(regex)
22
+ end
23
+
24
+ it "does not match" do
25
+ expect("SELECT * FROM apple WHERE id='foo'").to_not match(regex)
26
+ expect("SELECT foo FROM apple WHERE id='bar'").to_not match(regex)
27
+
28
+ expect("DELETE FROM apple").to_not match(regex)
29
+ expect("INSERT INTO apple (a, b, c) VALUES (1, 2, 3)").to_not match(regex)
30
+ expect("UPDATE apple SET a=b WHERE id=1").to_not match(regex)
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,44 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::BackgroundProcessor do
2
+ include WaitForPop
3
+
4
+ let(:instance) { described_class.new }
5
+
6
+ let(:event) { {caller: "CALLER", sql: "SQL", logger: logger} }
7
+
8
+ let(:logger) do
9
+ Class.new do
10
+ def self.events
11
+ @events ||= []
12
+ end
13
+
14
+ def self.filter_event(*)
15
+ end
16
+
17
+ def self.log(event)
18
+ events << event
19
+ sleep 2
20
+ end
21
+ end
22
+ end
23
+
24
+ before do
25
+ ActiveRecord::SqlAnalyzer.configure do |c|
26
+ c.backtrace_filter_proc Proc.new { |lines| "BFP #{lines}" }
27
+ c.complex_sql_redactor_proc Proc.new { |sql| "CSRP #{sql}" }
28
+ end
29
+ end
30
+
31
+ it "processes in the background" do
32
+ instance << event
33
+ wait_for_pop
34
+
35
+ expect(logger.events).to eq(
36
+ [
37
+ {
38
+ caller: "BFP CALLER",
39
+ sql: "CSRP SQL"
40
+ }
41
+ ]
42
+ )
43
+ end
44
+ end
@@ -0,0 +1,28 @@
1
+ RSpec.describe ActiveRecord::SqlAnalyzer::BacktraceFilter do
2
+ before do
3
+ ActiveRecord::SqlAnalyzer.configure { |_c| }
4
+ end
5
+
6
+ it "filters non-app paths" do
7
+ lines = ActiveRecord::SqlAnalyzer.config[:backtrace_filter_proc].call(
8
+ [
9
+ "foo/bar:1 in 'method'",
10
+ "#{Gem.path.first}:4231 in 'method'",
11
+ "foo/bar:2 in 'method'",
12
+ "#{File.realpath(Gem.path.first)}:9531 in 'method'",
13
+ "foo/bar:3 in 'method'",
14
+ "(eval):1234 in 'method'",
15
+ "foo/bar:4 in 'method'"
16
+ ]
17
+ )
18
+
19
+ expect(lines).to eq(
20
+ [
21
+ "foo/bar:1 in 'method'",
22
+ "foo/bar:2 in 'method'",
23
+ "foo/bar:3 in 'method'",
24
+ "foo/bar:4 in 'method'"
25
+ ]
26
+ )
27
+ end
28
+ end
@@ -0,0 +1,120 @@
1
+ require_relative "../../../lib/active_record/sql_analyzer/cli_processor"
2
+
3
+ RSpec.describe ActiveRecord::SqlAnalyzer::CLIProcessor do
4
+ let(:tmp_dir) { Dir.mktmpdir }
5
+ after { FileUtils.remove_entry(tmp_dir) }
6
+
7
+ def write_logs(prefix, *events)
8
+ started_at = Time.utc(2015, 1, 1, 0)
9
+
10
+ events.each_slice(2).each_with_index do |lines, index|
11
+ logger = ActiveRecord::SqlAnalyzer::CompactLogger.new(
12
+ tmp_dir,
13
+ "#{prefix}_#{index}"
14
+ )
15
+
16
+ lines.each do |event|
17
+ started_at += 3600
18
+
19
+ Timecop.freeze(started_at) do
20
+ logger.log(event)
21
+ end
22
+ end
23
+
24
+ logger.close
25
+ end
26
+ end
27
+
28
+ let(:instance) { described_class.new(2) }
29
+
30
+ before do
31
+ write_logs(:foo,
32
+ {sql: "F-SQL1", caller: "CALLER1", tag: true},
33
+ {sql: "F-SQL2", caller: "CALLER2", tag: true},
34
+ {sql: "F-SQL1", caller: "CALLER1", tag: true},
35
+ {sql: "F-SQL3", caller: "CALLER3", tag: true},
36
+ {sql: "F-SQL2", caller: "CALLER2", tag: true}
37
+ )
38
+
39
+ write_logs(:bar,
40
+ {sql: "B-SQL1", caller: "CALLER1"},
41
+ {sql: "B-SQL2", caller: "CALLER2"},
42
+ {sql: "B-SQL3", caller: "CALLER3"},
43
+ {sql: "B-SQL2", caller: "CALLER2"},
44
+ {sql: "B-SQL1", caller: "CALLER1"}
45
+ )
46
+ end
47
+
48
+ subject(:process) do
49
+ instance.run_definition(
50
+ Dir["#{tmp_dir}/*_*_definitions.log"].map do |path|
51
+ [File.basename(path).split("_", 2).first, path]
52
+ end
53
+ )
54
+
55
+ instance.run_usage(
56
+ Dir["#{tmp_dir}/*_*.log"].map do |path|
57
+ next if path =~ /definitions\.log$/
58
+ [File.basename(path).split("_", 2).first, path]
59
+ end.compact
60
+ )
61
+
62
+ instance.definitions
63
+ end
64
+
65
+ it "processes logs" do
66
+ logs = process
67
+
68
+ expect(logs["foo"].length).to eq(3)
69
+
70
+ queries = logs["foo"].values.sort_by { |row| row["sql"] }
71
+
72
+ expect(queries[0]["sql"]).to eq("F-SQL1")
73
+ expect(queries[0]["caller"]).to eq("CALLER1")
74
+ expect(queries[0]["count"]).to eq(2)
75
+ expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 3))
76
+ expect(queries[0]["tag"]).to eq(true)
77
+
78
+ expect(queries[1]["sql"]).to eq("F-SQL2")
79
+ expect(queries[1]["caller"]).to eq("CALLER2")
80
+ expect(queries[1]["count"]).to eq(2)
81
+ expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 5))
82
+ expect(queries[1]["tag"]).to eq(true)
83
+
84
+ expect(queries[2]["sql"]).to eq("F-SQL3")
85
+ expect(queries[2]["caller"]).to eq("CALLER3")
86
+ expect(queries[2]["count"]).to eq(1)
87
+ expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 4))
88
+ expect(queries[2]["tag"]).to eq(true)
89
+
90
+ expect(logs["bar"].length).to eq(3)
91
+
92
+ queries = logs["bar"].values.sort_by { |row| row["sql"] }
93
+
94
+ expect(queries[0]["sql"]).to eq("B-SQL1")
95
+ expect(queries[0]["caller"]).to eq("CALLER1")
96
+ expect(queries[0]["count"]).to eq(2)
97
+ expect(queries[0]["last_called"]).to eq(Time.utc(2015, 1, 1, 5))
98
+ expect(queries[0]["tag"]).to eq(nil)
99
+
100
+ expect(queries[1]["sql"]).to eq("B-SQL2")
101
+ expect(queries[1]["caller"]).to eq("CALLER2")
102
+ expect(queries[1]["count"]).to eq(2)
103
+ expect(queries[1]["last_called"]).to eq(Time.utc(2015, 1, 1, 4))
104
+ expect(queries[1]["tag"]).to eq(nil)
105
+
106
+ expect(queries[2]["sql"]).to eq("B-SQL3")
107
+ expect(queries[2]["caller"]).to eq("CALLER3")
108
+ expect(queries[2]["count"]).to eq(1)
109
+ expect(queries[2]["last_called"]).to eq(Time.utc(2015, 1, 1, 3))
110
+ expect(queries[2]["tag"]).to eq(nil)
111
+ end
112
+
113
+ it "dumps to disk" do
114
+ process
115
+ instance.dump(tmp_dir)
116
+
117
+ paths = Dir["#{tmp_dir}/{foo,bar}_#{Time.now.strftime("%Y-%m-%d")}.log"]
118
+ expect(paths.length).to eq(2)
119
+ end
120
+ end
@@ -0,0 +1,66 @@
1
+ require "fileutils"
2
+ require_relative "../../../lib/active_record/sql_analyzer/cli"
3
+ require_relative "../../../lib/active_record/sql_analyzer/cli_processor"
4
+
5
+ RSpec.describe ActiveRecord::SqlAnalyzer::CLI do
6
+ let(:tmp_dir) { Dir.mktmpdir }
7
+ after { FileUtils.remove_entry(tmp_dir) }
8
+
9
+ let(:instance) do
10
+ cli = described_class.new
11
+ cli.parse_options(["--log-dir", tmp_dir, "--dest-dir", tmp_dir])
12
+ cli
13
+ end
14
+
15
+ before do
16
+ %w(foo bar).each do |prefix|
17
+ 3.times do |i|
18
+ FileUtils.touch("#{tmp_dir}/#{prefix}.log.#{i}")
19
+ FileUtils.touch("#{tmp_dir}/#{prefix}_definitions.log.#{i}")
20
+ end
21
+ end
22
+ end
23
+
24
+ # I'm sorry
25
+ it "parses logs and starts the processor" do
26
+ expect(instance.processor).to receive(:run_definition) do |paths|
27
+ expect(paths.length).to eq(6)
28
+
29
+ prefixes = paths[0, 3].map(&:first).uniq
30
+ logs = paths[0, 3].map(&:last)
31
+ expect(prefixes.length).to eq(1)
32
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/)
33
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/)
34
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/)
35
+
36
+ prefixes = paths[3, 6].map(&:first).uniq
37
+ logs = paths[3, 6].map(&:last)
38
+ expect(prefixes.length).to eq(1)
39
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.0/)
40
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.1/)
41
+ expect(logs).to include(/#{prefixes.first}_definitions\.log\.2/)
42
+ end
43
+
44
+ expect(instance.processor).to receive(:run_usage) do |paths|
45
+ expect(paths.length).to eq(6)
46
+
47
+ prefixes = paths[0, 3].map(&:first).uniq
48
+ logs = paths[0, 3].map(&:last)
49
+ expect(prefixes.length).to eq(1)
50
+ expect(logs).to include(/#{prefixes.first}\.log\.0/)
51
+ expect(logs).to include(/#{prefixes.first}\.log\.1/)
52
+ expect(logs).to include(/#{prefixes.first}\.log\.2/)
53
+
54
+ prefixes = paths[3, 6].map(&:first).uniq
55
+ logs = paths[3, 6].map(&:last)
56
+ expect(prefixes.length).to eq(1)
57
+ expect(logs).to include(/#{prefixes.first}\.log\.0/)
58
+ expect(logs).to include(/#{prefixes.first}\.log\.1/)
59
+ expect(logs).to include(/#{prefixes.first}\.log\.2/)
60
+ end
61
+
62
+ expect(instance.processor).to receive(:dump).with(tmp_dir)
63
+
64
+ instance.run
65
+ end
66
+ end
@@ -0,0 +1,121 @@
1
+ RSpec.describe "End to End" do
2
+ include WaitForPop
3
+
4
+ let(:tmp_dir) { Dir.mktmpdir }
5
+ after { FileUtils.remove_entry(tmp_dir) }
6
+
7
+ let(:log_path) { "#{tmp_dir}/test_tag.log" }
8
+ let(:log_data) { File.read(log_path) }
9
+
10
+ let(:log_hash) do
11
+ hash = {}
12
+ log_data.split("\n").each do |line|
13
+ sha = line.split("|", 2).last
14
+ hash[sha] ||= 0
15
+ hash[sha] += 1
16
+ end
17
+ hash
18
+ end
19
+
20
+ let(:log_reverse_hash) do
21
+ Hash[log_hash.map(&:reverse)]
22
+ end
23
+
24
+ let(:log_def_path) { "#{tmp_dir}/test_tag_definitions.log" }
25
+ let(:log_def_data) { File.read(log_def_path) }
26
+ let(:log_def_hash) do
27
+ hash = {}
28
+ log_def_data.split("\n").each do |line|
29
+ sha, event = line.split("|", 2)
30
+ hash[sha] = JSON.parse(event)
31
+ end
32
+
33
+ hash
34
+ end
35
+
36
+ before do
37
+ ActiveRecord::SqlAnalyzer.configure do |c|
38
+ c.logger_root_path tmp_dir
39
+ c.log_sample_proc Proc.new { |_name| true }
40
+
41
+ c.add_analyzer(
42
+ name: :test_tag,
43
+ tables: %w(matching_table)
44
+ )
45
+ end
46
+
47
+ ActiveRecord::SqlAnalyzer.config[:analyzers].each do |analyzer|
48
+ analyzer[:logger_instance].definition_log_file.sync = true
49
+ analyzer[:logger_instance].log_file.sync = true
50
+ end
51
+ end
52
+
53
+ def execute(sql)
54
+ DBConnection.connection.execute(sql)
55
+ wait_for_pop
56
+ end
57
+
58
+ it "does not log with a non-matching table" do
59
+ execute "SELECT * FROM nonmatching_table"
60
+
61
+ expect(log_data).to eq("")
62
+ expect(log_def_data).to eq("")
63
+ end
64
+
65
+ it "logs with a matching + non-matching table in one query" do
66
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 1234"
67
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.id = 4321"
68
+ execute "SELECT nmt.id FROM nonmatching_table AS nmt JOIN matching_table AS mt ON mt.id = nmt.id WHERE mt.test_string = 'abc'"
69
+
70
+ expect(log_hash.length).to eq(2)
71
+
72
+ id_sha = log_reverse_hash[2]
73
+ str_sha = log_reverse_hash[1]
74
+
75
+ expect(log_def_hash.length).to eq(2)
76
+
77
+ expect(log_def_hash[id_sha]["sql"]).to include("mt.id = [REDACTED]")
78
+ expect(log_def_hash[str_sha]["sql"]).to include("mt.test_string = '[REDACTED]'")
79
+ end
80
+
81
+ it "logs with only a matching table in a query" do
82
+ execute "SELECT * FROM matching_table WHERE id = 1234"
83
+ execute "SELECT * FROM matching_table WHERE id = 4321"
84
+ execute "SELECT * FROM matching_table WHERE test_string = 'abc'"
85
+
86
+ expect(log_hash.length).to eq(2)
87
+
88
+ id_sha = log_reverse_hash[2]
89
+ str_sha = log_reverse_hash[1]
90
+
91
+ expect(log_def_hash.length).to eq(2)
92
+
93
+ expect(log_def_hash[id_sha]["sql"]).to include("id = [REDACTED]")
94
+ expect(log_def_hash[str_sha]["sql"]).to include("test_string = '[REDACTED]'")
95
+ end
96
+
97
+ it "handles invalid UTF-8" do
98
+ execute "SELECT * FROM matching_table WHERE test_string = '\xe5'"
99
+ execute "SELECT * FROM matching_table WHERE test_string = 'foobar'"
100
+
101
+ expect(log_reverse_hash.first.first).to eq(2)
102
+
103
+ sha = log_reverse_hash[2]
104
+ expect(log_def_hash[sha]["sql"]).to include("test_string = '[REDACTED]'")
105
+ end
106
+
107
+ context "when sampling is disabled" do
108
+ before do
109
+ ActiveRecord::SqlAnalyzer.configure do |c|
110
+ c.log_sample_proc Proc.new { |_name| false }
111
+ end
112
+ end
113
+
114
+ it "does not log" do
115
+ execute "SELECT * FROM matching_table"
116
+
117
+ expect(log_data).to eq("")
118
+ expect(log_def_data).to eq("")
119
+ end
120
+ end
121
+ end