wt_activerecord_index_spy 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile ADDED
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rspec/core/rake_task"
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require "rubocop/rake_task"
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
13
+
14
+ Rake::Task["release:rubygem_push"].clear
15
+ desc "Pick up the .gem file from pkg/ and push it to Gemfury"
16
+ task "release:rubygem_push" do
17
+ # IMPORTANT: You need to have the `fury` gem installed, and you need to be logged in.
18
+ # Please DO READ about "impersonation", which is how you push to your company account instead
19
+ # of your personal account!
20
+ # https://gemfury.com/help/collaboration#impersonation
21
+ paths = Dir.glob("#{__dir__}/pkg/*.gem")
22
+ raise "Must have found only 1 .gem path, but found #{paths.inspect}" if paths.length != 1
23
+
24
+ escaped_gem_path = Shellwords.escape(paths.shift)
25
+ `fury push #{escaped_gem_path} --as=wetransfer`
26
+ end
27
+
28
+ namespace :db do
29
+ require_relative "./spec/support/test_database"
30
+ require "active_record"
31
+ require "dotenv/load"
32
+ Dotenv.load
33
+
34
+ desc "Create databases to be used in tests"
35
+ task "create" do
36
+ adapter = ENV.fetch("ADAPTER", "mysql2")
37
+ puts "Creating #{adapter}"
38
+ TestDatabase.set_env_database_url(adapter)
39
+ TestDatabase.establish_connection
40
+ ActiveRecord::Base.connection.create_database(TestDatabase.database_name)
41
+ end
42
+
43
+ desc "Drop databases to be used in tests"
44
+ task "drop" do
45
+ adapter = ENV.fetch("ADAPTER", "mysql2")
46
+ puts "Dropping #{adapter}"
47
+ TestDatabase.set_env_database_url(adapter)
48
+ TestDatabase.establish_connection
49
+ ActiveRecord::Base.connection.drop_database(TestDatabase.database_name)
50
+ end
51
+
52
+ desc "Migrate databases to be used in tests"
53
+ task "migrate" do
54
+ adapter = ENV.fetch("ADAPTER", "mysql2")
55
+ puts "Migrating #{adapter}"
56
+ TestDatabase.set_env_database_url(adapter, with_database_name: true)
57
+ TestDatabase.establish_connection
58
+ TestDatabase.run_migrations
59
+ end
60
+ end
data/bin/console ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require "bundler/setup"
5
+ require "wt_activerecord_index_spy"
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ # (If you use this, don't forget to add pry to your Gemfile!)
11
+ # require "pry"
12
+ # Pry.start
13
+
14
+ require "irb"
15
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "wt_activerecord_index_spy/version"
4
+ require_relative "wt_activerecord_index_spy/aggregator"
5
+ require_relative "wt_activerecord_index_spy/query_analyser"
6
+ require_relative "wt_activerecord_index_spy/query_analyser/mysql"
7
+ require_relative "wt_activerecord_index_spy/query_analyser/postgres"
8
+ require_relative "wt_activerecord_index_spy/notification_listener"
9
+ require "logger"
10
+
11
+ # This is the top level module which requires everything
12
+ module WtActiverecordIndexSpy
13
+ extend self
14
+
15
+ attr_accessor :logger
16
+
17
+ def aggregator
18
+ @aggregator ||= Aggregator.new
19
+ end
20
+
21
+ def query_analyser
22
+ @query_analyser ||= QueryAnalyser.new
23
+ end
24
+
25
+ # rubocop:disable Metrics/MethodLength
26
+ def watch_queries(
27
+ aggregator: self.aggregator,
28
+ ignore_queries_originated_in_test_code: true,
29
+ query_analyser: self.query_analyser
30
+ )
31
+ aggregator.reset
32
+
33
+ notification_listener = NotificationListener.new(
34
+ aggregator: aggregator,
35
+ ignore_queries_originated_in_test_code: ignore_queries_originated_in_test_code,
36
+ query_analyser: query_analyser
37
+ )
38
+
39
+ subscriber = ActiveSupport::Notifications
40
+ .subscribe("sql.active_record", notification_listener)
41
+
42
+ return unless block_given?
43
+
44
+ yield
45
+
46
+ ActiveSupport::Notifications.unsubscribe(subscriber)
47
+ end
48
+ # rubocop:enable Metrics/MethodLength
49
+
50
+ def export_html_results(file = nil, stdout: $stdout)
51
+ aggregator.export_html_results(file, stdout: stdout)
52
+ end
53
+
54
+ def certain_results
55
+ aggregator.certain_results
56
+ end
57
+
58
+ def results
59
+ aggregator.results
60
+ end
61
+
62
+ def reset_results
63
+ aggregator.reset
64
+ end
65
+
66
+ def boot
67
+ @logger = Logger.new("/dev/null")
68
+ end
69
+ end
70
+
71
+ WtActiverecordIndexSpy.boot
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "erb"
4
+ require "tmpdir"
5
+
6
+ module WtActiverecordIndexSpy
7
+ # This class aggregates all queries that were considered not using index.
8
+ # Since it's not possible to be sure for every query, it separates the result
9
+ # in certains and uncertains.
10
+ class Aggregator
11
+ attr_reader :results
12
+
13
+ Item = Struct.new(:identifier, :query, :origin, :certainity_level, keyword_init: true)
14
+
15
+ def initialize
16
+ @results = {}
17
+ end
18
+
19
+ def reset
20
+ @results = {}
21
+ end
22
+
23
+ # item: an instance of Aggregator::Item
24
+ def add(item)
25
+ @results[item.query] = item
26
+ end
27
+
28
+ def certain_results
29
+ @results.map do |_query, item|
30
+ item if item.certainity_level == :certain
31
+ end.compact
32
+ end
33
+
34
+ def uncertain_results
35
+ @results.map do |_query, item|
36
+ item if item.certainity_level == :uncertain
37
+ end.compact
38
+ end
39
+
40
+ def export_html_results(file, stdout: $stdout)
41
+ file ||= default_html_output_file
42
+ content = ERB.new(File.read(File.join(File.dirname(__FILE__), "./results.html.erb")), 0, "-")
43
+ .result_with_hash(certain_results: certain_results, uncertain_results: uncertain_results)
44
+
45
+ file.write(content)
46
+ file.close
47
+ stdout.puts "Report exported to #{file.path}"
48
+ end
49
+
50
+ private
51
+
52
+ def default_html_output_file
53
+ File.new(
54
+ File.join(Dir.tmpdir, "wt_activerecord_index_spy-results.html"),
55
+ "w"
56
+ )
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ MissingIndex = Class.new(StandardError)
5
+
6
+ # This class can be used to subscribe to an activerecord "sql.active_record"
7
+ # notification.
8
+ # It gets each query that uses a WHERE statement and runs a EXPLAIN query to
9
+ # see if it uses an index.
10
+ class NotificationListener
11
+ IGNORED_SQL = [
12
+ /^PRAGMA (?!(table_info))/,
13
+ /^SELECT currval/,
14
+ /^SELECT CAST/,
15
+ /^SELECT @@IDENTITY/,
16
+ /^SELECT @@ROWCOUNT/,
17
+ /^SAVEPOINT/,
18
+ /^ROLLBACK TO SAVEPOINT/,
19
+ /^RELEASE SAVEPOINT/,
20
+ /^SHOW max_identifier_length/,
21
+ /^SELECT @@FOREIGN_KEY_CHECKS/,
22
+ /^SET FOREIGN_KEY_CHECKS/,
23
+ /^TRUNCATE TABLE/,
24
+ /^EXPLAIN/
25
+ ].freeze
26
+
27
+ attr_reader :queries_missing_index
28
+
29
+ def initialize(ignore_queries_originated_in_test_code:,
30
+ aggregator: Aggregator.new,
31
+ query_analyser: QueryAnalyser.new)
32
+ @queries_missing_index = []
33
+ @aggregator = aggregator
34
+ @query_analyser = query_analyser
35
+ @ignore_queries_originated_in_test_code = ignore_queries_originated_in_test_code
36
+ end
37
+
38
+ # TODO: refactor me pls to remove all these Rubocop warnings!
39
+ # rubocop:disable Metrics/AbcSize
40
+ # rubocop:disable Metrics/MethodLength
41
+ def call(_name, _start, _finish, _message_id, values)
42
+ query = values[:sql]
43
+ logger.debug "query: #{query}"
44
+ identifier = values[:name]
45
+
46
+ if ignore_query?(query: query, name: identifier)
47
+ logger.debug "query type ignored"
48
+ return
49
+ end
50
+ logger.debug "query type accepted"
51
+
52
+ origin = caller.find { |line| !line.include?("/gems/") }
53
+ if @ignore_queries_originated_in_test_code && query_originated_in_tests?(origin)
54
+ logger.debug "origin ignored: #{origin}"
55
+ # Hopefully, it will get the line which executed the query.
56
+ # It ignores activerecord, activesupport and other gem frames.
57
+ # Maybe there is a better way to achieve it
58
+ return
59
+ end
60
+
61
+ logger.debug "origin accepted: #{origin}"
62
+
63
+ certainity_level = @query_analyser.analyse(**values.slice(:sql, :connection, :binds))
64
+ return unless certainity_level
65
+
66
+ item = Aggregator::Item.new(
67
+ identifier: identifier,
68
+ query: query,
69
+ origin: reduce_origin(origin),
70
+ certainity_level: certainity_level
71
+ )
72
+
73
+ @aggregator.add(item)
74
+ end
75
+ # rubocop:enable Metrics/AbcSize
76
+ # rubocop:enable Metrics/MethodLength
77
+
78
+ private
79
+
80
+ # TODO: Find a better way to detect if the origin is a test file
81
+ def query_originated_in_tests?(origin)
82
+ origin.include?("spec/") ||
83
+ origin.include?("test/")
84
+ end
85
+
86
+ def ignore_query?(name:, query:)
87
+ # FIXME: this seems bad. we should probably have a better way to indicate
88
+ # the query was cached
89
+ name == "CACHE" ||
90
+ name == "SCHEMA" ||
91
+ !name ||
92
+ !query.downcase.include?("where") ||
93
+ IGNORED_SQL.any? { |r| query =~ r }
94
+ end
95
+
96
+ def reduce_origin(origin)
97
+ origin[0...origin.rindex(":")]
98
+ .split("/")[-2..-1]
99
+ .join("/")
100
+ end
101
+
102
+ def logger
103
+ WtActiverecordIndexSpy.logger
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ # It runs an EXPLAIN query given a query and analyses the result to see if
5
+ # some index is missing.
6
+ class QueryAnalyser
7
+ def initialize
8
+ # This is a cache to not run the same EXPLAIN again
9
+ # It sets the query as key and the result (certain, uncertain) as the value
10
+ @analysed_queries = {}
11
+ end
12
+
13
+ # The sql and binds vary depend on the adapter.
14
+ # - Mysql2: sends sql complete and binds = []
15
+ # - Postregs: sends sql in a form of prepared statement and its values in binds
16
+ # rubocop:disable Metrics/MethodLength
17
+ def analyse(sql:, connection: ActiveRecord::Base.connection, binds: [])
18
+ query = sql
19
+ # TODO: this could be more intelligent to not duplicate similar queries
20
+ # with different WHERE values, example:
21
+ # - WHERE lala = 1 AND popo = 1
22
+ # - WHERE lala = 2 AND popo = 2
23
+ # Notes:
24
+ # - The Postgres adapter uses prepared statements as default, so it
25
+ # will save the queries without the values.
26
+ # - The Mysql2 adapter does not use prepared statements as default, so it
27
+ # will analyse very similar queries as described above.
28
+ return @analysed_queries[query] if @analysed_queries.key?(query)
29
+
30
+ adapter = select_adapter(connection)
31
+
32
+ # We need a thread to use a different connection that it's used by the
33
+ # application otherwise, it can change some ActiveRecord internal state
34
+ # such as number_of_affected_rows that is returned by the method
35
+ # `update_all`
36
+ Thread.new do
37
+ results = ActiveRecord::Base.connection_pool.with_connection do |conn|
38
+ conn.exec_query("EXPLAIN #{query}", "SQL", binds)
39
+ end
40
+
41
+ adapter.analyse(results).tap do |certainity_level|
42
+ @analysed_queries[query] = certainity_level
43
+ end
44
+ end.join.value
45
+ end
46
+ # rubocop:enable Metrics/MethodLength
47
+
48
+ private
49
+
50
+ def select_adapter(connection)
51
+ case connection.adapter_name
52
+ when "Mysql2"
53
+ QueryAnalyser::Mysql
54
+ when "PostgreSQL"
55
+ QueryAnalyser::Postgres
56
+ else
57
+ raise NotImplementedError, "adapter: #{ActiveRecord::Base.connection.adapter_name}"
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ class QueryAnalyser
5
+ # It analyses the result of an EXPLAIN query to see if any index is missing.
6
+ module Mysql
7
+ extend self
8
+
9
+ ALLOWED_EXTRA_VALUES = [
10
+ # https://bugs.mysql.com/bug.php?id=64197
11
+ "Impossible WHERE noticed after reading const tables",
12
+ "no matching row"
13
+ ].freeze
14
+
15
+ def analyse(results)
16
+ results.find do |result|
17
+ certainity_level = analyse_explain(result)
18
+
19
+ break certainity_level if certainity_level
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ # rubocop: disable Metrics/CyclomaticComplexity
26
+ # rubocop: disable Metrics/PerceivedComplexity
27
+ def analyse_explain(result)
28
+ type = result.fetch("type")
29
+ possible_keys = result.fetch("possible_keys")
30
+ key = result.fetch("key")
31
+ extra = result.fetch("Extra")
32
+
33
+ # more details about the result in https://dev.mysql.com/doc/refman/8.0/en/explain-output.html
34
+ return if type == "ref"
35
+ return if ALLOWED_EXTRA_VALUES.any? { |value| extra&.include?(value) }
36
+
37
+ return :certain if possible_keys.nil?
38
+ return :uncertain if possible_keys == "PRIMARY" && key.nil? && type == "ALL"
39
+ end
40
+ # rubocop: enable Metrics/CyclomaticComplexity
41
+ # rubocop: enable Metrics/PerceivedComplexity
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WtActiverecordIndexSpy
4
+ class QueryAnalyser
5
+ # It analyses the result of an EXPLAIN query to see if any index is missing.
6
+ module Postgres
7
+ extend self
8
+
9
+ def analyse(results)
10
+ WtActiverecordIndexSpy.logger.debug("results:\n#{results.rows.join("\n")}")
11
+
12
+ full_results = results.rows.join(", ").downcase
13
+
14
+ # rubocop:disable Layout/LineLength
15
+ # Postgres sometimes uses a "seq scan" even for queries that could use an index.
16
+ # So it's almost impossible to be certain if an index is missing!
17
+ # The result of the EXPLAIN query varies depending on the state of the database
18
+ # because Postgres collects statistics from tables and decide if it's better
19
+ # using an index or not based on that.
20
+ # This is an example in a real application:
21
+ #
22
+ # [1] pry(main)> Feature.where(plan_id: 312312).explain
23
+ # Feature Load (4.0ms) SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
24
+ # => EXPLAIN for: SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
25
+ # QUERY PLAN
26
+ # ---------------------------------------------------------
27
+ # Seq Scan on features (cost=0.00..1.06 rows=1 width=72)
28
+ # Filter: (plan_id = 312312)
29
+ # (2 rows)
30
+ #
31
+ # [2] pry(main)> Feature.count
32
+ # (2.8ms) SELECT COUNT(*) FROM "features"
33
+ # => 5
34
+ # [3] pry(main)> Plan.count
35
+ # (2.7ms) SELECT COUNT(*) FROM "plans"
36
+ # => 2
37
+ #
38
+ ####################################################################################################################
39
+ #
40
+ # [1] pry(main)> Feature.where(plan_id: 312312).explain
41
+ # Feature Load (2.3ms) SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
42
+ # => EXPLAIN for: SELECT "features".* FROM "features" WHERE "features"."plan_id" = $1 [["plan_id", 312312]]
43
+ # QUERY PLAN
44
+ # ----------------------------------------------------------------------------------------
45
+ # Bitmap Heap Scan on features (cost=4.18..12.64 rows=4 width=72)
46
+ # Recheck Cond: (plan_id = 312312)
47
+ # -> Bitmap Index Scan on index_features_on_plan_id (cost=0.00..4.18 rows=4 width=0)
48
+ # Index Cond: (plan_id = 312312)
49
+ # rubocop:enable Layout/LineLength
50
+ return :uncertain if full_results.include?("seq scan on")
51
+ end
52
+ end
53
+ end
54
+ end