wt_activerecord_index_spy 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile ADDED
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
13
+
14
+ Rake::Task["release:rubygem_push"].clear
15
+ desc "Pick up the .gem file from pkg/ and push it to Gemfury"
16
+ task "release:rubygem_push" do
17
+ # IMPORTANT: You need to have the `fury` gem installed, and you need to be logged in.
18
+ # Please DO READ about "impersonation", which is how you push to your company account instead
19
+ # of your personal account!
20
+ # https://gemfury.com/help/collaboration#impersonation
21
+ paths = Dir.glob("#{__dir__}/pkg/*.gem")
22
+ raise "Must have found only 1 .gem path, but found #{paths.inspect}" if paths.length != 1
23
+
24
+ escaped_gem_path = Shellwords.escape(paths.shift)
25
+ `fury push #{escaped_gem_path} --as=wetransfer`
26
+ end
27
+
28
+ namespace :db do
29
+ require_relative "./spec/support/test_database"
30
+ require "active_record"
31
+ require "dotenv/load"
32
+ Dotenv.load
33
+
34
+ desc "Create databases to be used in tests"
35
+ task "create" do
36
+ adapter = ENV.fetch("ADAPTER", "mysql2")
37
+ puts "Creating #{adapter}"
38
+ TestDatabase.set_env_database_url(adapter)
39
+ TestDatabase.establish_connection
40
+ ActiveRecord::Base.connection.create_database(TestDatabase.database_name)
41
+ end
42
+
43
+ desc "Drop databases to be used in tests"
44
+ task "drop" do
45
+ adapter = ENV.fetch("ADAPTER", "mysql2")
46
+ puts "Dropping #{adapter}"
47
+ TestDatabase.set_env_database_url(adapter)
48
+ TestDatabase.establish_connection
49
+ ActiveRecord::Base.connection.drop_database(TestDatabase.database_name)
50
+ end
51
+
52
+ desc "Migrate databases to be used in tests"
53
+ task "migrate" do
54
+ adapter = ENV.fetch("ADAPTER", "mysql2")
55
+ puts "Migrating #{adapter}"
56
+ TestDatabase.set_env_database_url(adapter, with_database_name: true)
57
+ TestDatabase.establish_connection
58
+ TestDatabase.run_migrations
59
+ end
60
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "wt_activerecord_index_spy"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "wt_activerecord_index_spy/version"
4
+ require_relative "wt_activerecord_index_spy/aggregator"
5
+ require_relative "wt_activerecord_index_spy/query_analyser"
6
+ require_relative "wt_activerecord_index_spy/query_analyser/mysql"
7
+ require_relative "wt_activerecord_index_spy/query_analyser/postgres"
8
+ require_relative "wt_activerecord_index_spy/notification_listener"
9
+ require "logger"
10
+
11
+ # This is the top level module which requires everything
12
+ module WtActiverecordIndexSpy
13
+ extend self
14
+
15
+ attr_accessor :logger
16
+
17
+ def aggregator
18
+ @aggregator ||= Aggregator.new
19
+ end
20
+
21
+ def query_analyser
22
+ @query_analyser ||= QueryAnalyser.new
23
+ end
24
+
25
+ # rubocop:disable Metrics/MethodLength
26
+ def watch_queries(
27
+ aggregator: self.aggregator,
28
+ ignore_queries_originated_in_test_code: true,
29
+ query_analyser: self.query_analyser
30
+ )
31
+ aggregator.reset
32
+
33
+ notification_listener = NotificationListener.new(
34
+ aggregator: aggregator,
35
+ ignore_queries_originated_in_test_code: ignore_queries_originated_in_test_code,
36
+ query_analyser: query_analyser
37
+ )
38
+
39
+ subscriber = ActiveSupport::Notifications
40
+ .subscribe("sql.active_record", notification_listener)
41
+
42
+ return unless block_given?
43
+
44
+ yield
45
+
46
+ ActiveSupport::Notifications.unsubscribe(subscriber)
47
+ end
48
+ # rubocop:enable Metrics/MethodLength
49
+
50
+ def export_html_results(file = nil, stdout: $stdout)
51
+ aggregator.export_html_results(file, stdout: stdout)
52
+ end
53
+
54
+ def certain_results
55
+ aggregator.certain_results
56
+ end
57
+
58
+ def results
59
+ aggregator.results
60
+ end
61
+
62
+ def reset_results
63
+ aggregator.reset
64
+ end
65
+
66
+ def boot
67
+ @logger = Logger.new("/dev/null")
68
+ end
69
+ end
70
+
71
+ WtActiverecordIndexSpy.boot
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "erb"
4
+ require "tmpdir"
5
+
6
+ module WtActiverecordIndexSpy
7
+ # This class aggregates all queries that were considered not using index.
8
+ # Since it's not possible to be sure for every query, it separates the result
9
+ # in certains and uncertains.
10
+ class Aggregator
11
+ attr_reader :results
12
+
13
+ Item = Struct.new(:identifier, :query, :origin, :certainity_level, keyword_init: true)
14
+
15
+ def initialize
16
+ @results = {}
17
+ end
18
+
19
+ def reset
20
+ @results = {}
21
+ end
22
+
23
+ # item: an instance of Aggregator::Item
24
+ def add(item)
25
+ @results[item.query] = item
26
+ end
27
+
28
+ def certain_results
29
+ @results.map do |_query, item|
30
+ item if item.certainity_level == :certain
31
+ end.compact
32
+ end
33
+
34
+ def uncertain_results
35
+ @results.map do |_query, item|
36
+ item if item.certainity_level == :uncertain
37
+ end.compact
38
+ end
39
+
40
+ def export_html_results(file, stdout: $stdout)
41
+ file ||= default_html_output_file
42
+ content = ERB.new(File.read(File.join(File.dirname(__FILE__), "./results.html.erb")), 0, "-")
43
+ .result_with_hash(certain_results: certain_results, uncertain_results: uncertain_results)
44
+
45
+ file.write(content)
46
+ file.close
47
+ stdout.puts "Report exported to #{file.path}"
48
+ end
49
+
50
+ private
51
+
52
+ def default_html_output_file
53
+ File.new(
54
+ File.join(Dir.tmpdir, "wt_activerecord_index_spy-results.html"),
55
+ "w"
56
+ )
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ MissingIndex = Class.new(StandardError)
5
+
6
+ # This class can be used to subscribe to an activerecord "sql.active_record"
7
+ # notification.
8
+ # It gets each query that uses a WHERE statement and runs a EXPLAIN query to
9
+ # see if it uses an index.
10
+ class NotificationListener
11
+ IGNORED_SQL = [
12
+ /^PRAGMA (?!(table_info))/,
13
+ /^SELECT currval/,
14
+ /^SELECT CAST/,
15
+ /^SELECT @@IDENTITY/,
16
+ /^SELECT @@ROWCOUNT/,
17
+ /^SAVEPOINT/,
18
+ /^ROLLBACK TO SAVEPOINT/,
19
+ /^RELEASE SAVEPOINT/,
20
+ /^SHOW max_identifier_length/,
21
+ /^SELECT @@FOREIGN_KEY_CHECKS/,
22
+ /^SET FOREIGN_KEY_CHECKS/,
23
+ /^TRUNCATE TABLE/,
24
+ /^EXPLAIN/
25
+ ].freeze
26
+
27
+ attr_reader :queries_missing_index
28
+
29
+ def initialize(ignore_queries_originated_in_test_code:,
30
+ aggregator: Aggregator.new,
31
+ query_analyser: QueryAnalyser.new)
32
+ @queries_missing_index = []
33
+ @aggregator = aggregator
34
+ @query_analyser = query_analyser
35
+ @ignore_queries_originated_in_test_code = ignore_queries_originated_in_test_code
36
+ end
37
+
38
+ # TODO: refactor me pls to remove all these Rubocop warnings!
39
+ # rubocop:disable Metrics/AbcSize
40
+ # rubocop:disable Metrics/MethodLength
41
+ def call(_name, _start, _finish, _message_id, values)
42
+ query = values[:sql]
43
+ logger.debug "query: #{query}"
44
+ identifier = values[:name]
45
+
46
+ if ignore_query?(query: query, name: identifier)
47
+ logger.debug "query type ignored"
48
+ return
49
+ end
50
+ logger.debug "query type accepted"
51
+
52
+ origin = caller.find { |line| !line.include?("/gems/") }
53
+ if @ignore_queries_originated_in_test_code && query_originated_in_tests?(origin)
54
+ logger.debug "origin ignored: #{origin}"
55
+ # Hopefully, it will get the line which executed the query.
56
+ # It ignores activerecord, activesupport and other gem frames.
57
+ # Maybe there is a better way to achieve it
58
+ return
59
+ end
60
+
61
+ logger.debug "origin accepted: #{origin}"
62
+
63
+ certainity_level = @query_analyser.analyse(**values.slice(:sql, :connection, :binds))
64
+ return unless certainity_level
65
+
66
+ item = Aggregator::Item.new(
67
+ identifier: identifier,
68
+ query: query,
69
+ origin: reduce_origin(origin),
70
+ certainity_level: certainity_level
71
+ )
72
+
73
+ @aggregator.add(item)
74
+ end
75
+ # rubocop:enable Metrics/AbcSize
76
+ # rubocop:enable Metrics/MethodLength
77
+
78
+ private
79
+
80
+ # TODO: Find a better way to detect if the origin is a test file
81
+ def query_originated_in_tests?(origin)
82
+ origin.include?("spec/") ||
83
+ origin.include?("test/")
84
+ end
85
+
86
+ def ignore_query?(name:, query:)
87
+ # FIXME: this seems bad. we should probably have a better way to indicate
88
+ # the query was cached
89
+ name == "CACHE" ||
90
+ name == "SCHEMA" ||
91
+ !name ||
92
+ !query.downcase.include?("where") ||
93
+ IGNORED_SQL.any? { |r| query =~ r }
94
+ end
95
+
96
+ def reduce_origin(origin)
97
+ origin[0...origin.rindex(":")]
98
+ .split("/")[-2..-1]
99
+ .join("/")
100
+ end
101
+
102
+ def logger
103
+ WtActiverecordIndexSpy.logger
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ # It runs an EXPLAIN query given a query and analyses the result to see if
5
+ # some index is missing.
6
+ class QueryAnalyser
7
+ def initialize
8
+ # This is a cache to not run the same EXPLAIN again
9
+ # It sets the query as key and the result (certain, uncertain) as the value
10
+ @analysed_queries = {}
11
+ end
12
+
13
+ # The sql and binds vary depend on the adapter.
14
+ # - Mysql2: sends sql complete and binds = []
15
+ # - Postregs: sends sql in a form of prepared statement and its values in binds
16
+ # rubocop:disable Metrics/MethodLength
17
+ def analyse(sql:, connection: ActiveRecord::Base.connection, binds: [])
18
+ query = sql
19
+ # TODO: this could be more intelligent to not duplicate similar queries
20
+ # with different WHERE values, example:
21
+ # - WHERE lala = 1 AND popo = 1
22
+ # - WHERE lala = 2 AND popo = 2
23
+ # Notes:
24
+ # - The Postgres adapter uses prepared statements as default, so it
25
+ # will save the queries without the values.
26
+ # - The Mysql2 adapter does not use prepared statements as default, so it
27
+ # will analyse very similar queries as described above.
28
+ return @analysed_queries[query] if @analysed_queries.key?(query)
29
+
30
+ adapter = select_adapter(connection)
31
+
32
+ # We need a thread to use a different connection that it's used by the
33
+ # application otherwise, it can change some ActiveRecord internal state
34
+ # such as number_of_affected_rows that is returned by the method
35
+ # `update_all`
36
+ Thread.new do
37
+ results = ActiveRecord::Base.connection_pool.with_connection do |conn|
38
+ conn.exec_query("EXPLAIN #{query}", "SQL", binds)
39
+ end
40
+
41
+ adapter.analyse(results).tap do |certainity_level|
42
+ @analysed_queries[query] = certainity_level
43
+ end
44
+ end.join.value
45
+ end
46
+ # rubocop:enable Metrics/MethodLength
47
+
48
+ private
49
+
50
+ def select_adapter(connection)
51
+ case connection.adapter_name
52
+ when "Mysql2"
53
+ QueryAnalyser::Mysql
54
+ when "PostgreSQL"
55
+ QueryAnalyser::Postgres
56
+ else
57
+ raise NotImplementedError, "adapter: #{ActiveRecord::Base.connection.adapter_name}"
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ class QueryAnalyser
5
+ # It analyses the result of an EXPLAIN query to see if any index is missing.
6
+ module Mysql
7
+ extend self
8
+
9
+ ALLOWED_EXTRA_VALUES = [
10
+ # https://bugs.mysql.com/bug.php?id=64197
11
+ "Impossible WHERE noticed after reading const tables",
12
+ "no matching row"
13
+ ].freeze
14
+
15
+ def analyse(results)
16
+ results.find do |result|
17
+ certainity_level = analyse_explain(result)
18
+
19
+ break certainity_level if certainity_level
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ # rubocop: disable Metrics/CyclomaticComplexity
26
+ # rubocop: disable Metrics/PerceivedComplexity
27
+ def analyse_explain(result)
28
+ type = result.fetch("type")
29
+ possible_keys = result.fetch("possible_keys")
30
+ key = result.fetch("key")
31
+ extra = result.fetch("Extra")
32
+
33
+ # more details about the result in https://dev.mysql.com/doc/refman/8.0/en/explain-output.html
34
+ return if type == "ref"
35
+ return if ALLOWED_EXTRA_VALUES.any? { |value| extra&.include?(value) }
36
+
37
+ return :certain if possible_keys.nil?
38
+ return :uncertain if possible_keys == "PRIMARY" && key.nil? && type == "ALL"
39
+ end
40
+ # rubocop: enable Metrics/CyclomaticComplexity
41
+ # rubocop: enable Metrics/PerceivedComplexity
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ class QueryAnalyser
5
+ # It analyses the result of an EXPLAIN query to see if any index is missing.
6
+ module Postgres
7
+ extend self
8
+
9
+ def analyse(results)
10
+ WtActiverecordIndexSpy.logger.debug("results:\n#{results.rows.join("\n")}")
11
+
12
+ full_results = results.rows.join(", ").downcase
13
+
14
+ # rubocop:disable Layout/LineLength
15
+ # Postgres sometimes uses a "seq scan" even for queries that could use an index.
16
+ # So it's almost impossible to be certain if an index is missing!
17
+ # The result of the EXPLAIN query varies depending on the state of the database
18
+ # because Postgres collects statistics from tables and decide if it's better
19
+ # using an index or not based on that.
20
+ # This is an example in a real application:
21
+ #
22
+ # [1] pry(main)> Feature.where(plan_id: 312312).explain
23
+ # Feature Load (4.0ms) SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
24
+ # => EXPLAIN for: SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
25
+ # QUERY PLAN
26
+ # ---------------------------------------------------------
27
+ # Seq Scan on features (cost=0.00..1.06 rows=1 width=72)
28
+ # Filter: (plan_id = 312312)
29
+ # (2 rows)
30
+ #
31
+ # [2] pry(main)> Feature.count
32
+ # (2.8ms) SELECT COUNT(*) FROM "features"
33
+ # => 5
34
+ # [3] pry(main)> Plan.count
35
+ # (2.7ms) SELECT COUNT(*) FROM "plans"
36
+ # => 2
37
+ #
38
+ ####################################################################################################################
39
+ #
40
+ # [1] pry(main)> Feature.where(plan_id: 312312).explain
41
+ # Feature Load (2.3ms) SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
42
+ # => EXPLAIN for: SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
43
+ # QUERY PLAN
44
+ # ----------------------------------------------------------------------------------------
45
+ # Bitmap Heap Scan on features (cost=4.18..12.64 rows=4 width=72)
46
+ # Recheck Cond: (plan_id = 312312)
47
+ # -> Bitmap Index Scan on index_features_on_plan_id (cost=0.00..4.18 rows=4 width=0)
48
+ # Index Cond: (plan_id = 312312)
49
+ # rubocop:enable Layout/LineLength
50
+ return :uncertain if full_results.include?("seq scan on")
51
+ end
52
+ end
53
+ end
54
+ end