benedictus 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ class ExternalSort < Base
6
+ def initialize(**)
7
+ super()
8
+ end
9
+
10
+ def apply(node)
11
+ return [] unless node.node_type == "Sort"
12
+ return [] unless node.sort_method.to_s.include?("external")
13
+
14
+ space = if node.sort_space_used
15
+ " (#{node.sort_space_used} kB on #{node.sort_space_type || "disk"})"
16
+ else
17
+ ""
18
+ end
19
+
20
+ [
21
+ warning(
22
+ severity: :critical,
23
+ code: :external_sort,
24
+ message: "Sort spilled to disk via `#{node.sort_method}`#{space}.",
25
+ suggestion: "Increase `work_mem` or add an index that already orders the data."
26
+ )
27
+ ]
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ class NestedLoopBlowup < Base
6
+ DEFAULT_THRESHOLD = 10_000
7
+
8
+ def self.config_key
9
+ :nested_loop_threshold
10
+ end
11
+
12
+ def initialize(threshold: DEFAULT_THRESHOLD)
13
+ super()
14
+ @threshold = threshold
15
+ end
16
+
17
+ def apply(node)
18
+ return [] unless node.node_type == "Nested Loop"
19
+
20
+ inner = node.children.find { |c| c.parent_relationship == "Inner" } || node.children.last
21
+ return [] unless inner&.actual_loops && inner.actual_loops > @threshold
22
+
23
+ [
24
+ warning(
25
+ severity: :warning,
26
+ code: :nested_loop_blowup,
27
+ message: "Nested Loop drove #{inner.actual_loops} iterations on the inner side.",
28
+ suggestion: "Consider a Hash Join or Merge Join — try increasing `work_mem` or rewriting the join."
29
+ )
30
+ ]
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ class Registry
6
+ DEFAULT_HEURISTICS = [
7
+ SeqScanOnLargeTable,
8
+ RowEstimateDrift,
9
+ ExternalSort,
10
+ NestedLoopBlowup,
11
+ ExpensivePerRowScan
12
+ ].freeze
13
+
14
+ def initialize(heuristics = nil, config: {})
15
+ source = heuristics || DEFAULT_HEURISTICS
16
+ @heuristics = source.map { |h| instantiate(h, config) }
17
+ end
18
+
19
+ def annotate(tree)
20
+ tree.each_node do |node|
21
+ @heuristics.each do |heuristic|
22
+ node.warnings.concat(heuristic.apply(node))
23
+ end
24
+ end
25
+ tree
26
+ end
27
+
28
+ def self.annotate(tree, config: {})
29
+ new(config: config).annotate(tree)
30
+ end
31
+
32
+ private
33
+
34
+ def instantiate(heuristic, config)
35
+ return heuristic unless heuristic.is_a?(Class)
36
+
37
+ if heuristic.respond_to?(:config_key) && config.key?(key = heuristic.config_key)
38
+ heuristic.new(threshold: config[key])
39
+ else
40
+ heuristic.new
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ class RowEstimateDrift < Base
6
+ DEFAULT_FACTOR = 10
7
+
8
+ def self.config_key
9
+ :drift_factor
10
+ end
11
+
12
+ def initialize(threshold: DEFAULT_FACTOR)
13
+ super()
14
+ @factor = threshold
15
+ end
16
+
17
+ def apply(node)
18
+ return [] unless node.actual_rows && node.plan_rows
19
+ return [] if node.actual_rows.zero? && node.plan_rows.zero?
20
+
21
+ actual = node.actual_rows
22
+ plan = [node.plan_rows, 1].max
23
+ ratio = [actual.to_f / plan, plan.to_f / [actual, 1].max].max
24
+
25
+ return [] if ratio < @factor
26
+
27
+ relation = node.relation_name
28
+ suggestion = relation ? "Run `ANALYZE #{relation}` to refresh planner statistics." : nil
29
+
30
+ [
31
+ warning(
32
+ severity: :warning,
33
+ code: :row_estimate_drift,
34
+ message: "Plan estimated #{plan} rows, actual was #{actual} (#{ratio.round(1)}x drift).",
35
+ suggestion: suggestion
36
+ )
37
+ ]
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ class SeqScanOnLargeTable < Base
6
+ DEFAULT_THRESHOLD = 10_000
7
+
8
+ def self.config_key
9
+ :seq_scan_threshold
10
+ end
11
+
12
+ def initialize(threshold: DEFAULT_THRESHOLD)
13
+ super()
14
+ @threshold = threshold
15
+ end
16
+
17
+ def apply(node)
18
+ return [] unless node.node_type == "Seq Scan"
19
+ return [] unless node.plan_rows && node.plan_rows > @threshold
20
+
21
+ relation = node.relation_name || "this table"
22
+ rows = node.plan_rows
23
+ [
24
+ warning(
25
+ severity: :critical,
26
+ code: :seq_scan_on_large_table,
27
+ message: "Seq Scan on a table with ~#{rows.to_i} estimated rows",
28
+ suggestion: "Consider adding an index that matches the filter on `#{relation}`."
29
+ )
30
+ ]
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Heuristics
5
+ WARNING_SEVERITIES = %i[info warning critical].freeze
6
+
7
+ Warning = Struct.new(:severity, :code, :message, :suggestion, keyword_init: true) do
8
+ def initialize(*)
9
+ super
10
+ return if WARNING_SEVERITIES.include?(severity)
11
+
12
+ raise ArgumentError, "severity must be one of #{WARNING_SEVERITIES.inspect}"
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Plan
5
+ class Node
6
+ attr_reader :node_type, :relation_name, :alias_name, :index_name,
7
+ :startup_cost, :total_cost,
8
+ :plan_rows, :plan_width,
9
+ :actual_startup_time, :actual_total_time,
10
+ :actual_rows, :actual_loops,
11
+ :filter, :index_cond, :join_filter, :hash_cond, :recheck_cond,
12
+ :sort_method, :sort_key, :sort_space_type, :sort_space_used,
13
+ :join_type, :strategy, :parent_relationship, :subplan_name,
14
+ :children, :attrs
15
+
16
+ attr_accessor :warnings
17
+
18
+ def initialize(raw, children: [])
19
+ @raw = raw
20
+ @node_type = raw["Node Type"]
21
+ @relation_name = raw["Relation Name"]
22
+ @alias_name = raw["Alias"]
23
+ @index_name = raw["Index Name"]
24
+ @startup_cost = raw["Startup Cost"]
25
+ @total_cost = raw["Total Cost"]
26
+ @plan_rows = raw["Plan Rows"]
27
+ @plan_width = raw["Plan Width"]
28
+ @actual_startup_time = raw["Actual Startup Time"]
29
+ @actual_total_time = raw["Actual Total Time"]
30
+ @actual_rows = raw["Actual Rows"]
31
+ @actual_loops = raw["Actual Loops"]
32
+ @filter = raw["Filter"]
33
+ @index_cond = raw["Index Cond"]
34
+ @join_filter = raw["Join Filter"]
35
+ @hash_cond = raw["Hash Cond"]
36
+ @recheck_cond = raw["Recheck Cond"]
37
+ @sort_method = raw["Sort Method"]
38
+ @sort_key = raw["Sort Key"]
39
+ @sort_space_type = raw["Sort Space Type"]
40
+ @sort_space_used = raw["Sort Space Used"]
41
+ @join_type = raw["Join Type"]
42
+ @strategy = raw["Strategy"]
43
+ @parent_relationship = raw["Parent Relationship"]
44
+ @subplan_name = raw["Subplan Name"]
45
+ @children = children
46
+ @attrs = raw.reject { |k, _| RECOGNIZED_KEYS.include?(k) || k == "Plans" }
47
+ @warnings = []
48
+ end
49
+
50
+ def analyzed?
51
+ !actual_total_time.nil?
52
+ end
53
+
54
+ def leaf?
55
+ children.empty?
56
+ end
57
+
58
+ def target
59
+ return alias_name if alias_name && relation_name && alias_name != relation_name
60
+
61
+ relation_name
62
+ end
63
+
64
+ RECOGNIZED_KEYS = [
65
+ "Node Type", "Relation Name", "Alias", "Index Name",
66
+ "Startup Cost", "Total Cost", "Plan Rows", "Plan Width",
67
+ "Actual Startup Time", "Actual Total Time",
68
+ "Actual Rows", "Actual Loops",
69
+ "Filter", "Index Cond", "Join Filter", "Hash Cond", "Recheck Cond",
70
+ "Sort Method", "Sort Key", "Sort Space Type", "Sort Space Used",
71
+ "Join Type", "Strategy", "Parent Relationship", "Subplan Name"
72
+ ].freeze
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Plan
5
+ class Parser
6
+ class << self
7
+ def parse(json)
8
+ data = json.is_a?(String) ? JSON.parse(json) : json
9
+
10
+ raise Benedictus::DatabaseError, "EXPLAIN returned an empty result" if data.is_a?(Array) && data.empty?
11
+
12
+ envelope = data.is_a?(Array) ? data.first : data
13
+
14
+ unless envelope.is_a?(Hash) && envelope["Plan"].is_a?(Hash)
15
+ raise Benedictus::DatabaseError,
16
+ "EXPLAIN output did not contain a top-level 'Plan' object"
17
+ end
18
+
19
+ root = build_node(envelope["Plan"])
20
+
21
+ Tree.new(
22
+ root,
23
+ planning_time: envelope["Planning Time"],
24
+ execution_time: envelope["Execution Time"],
25
+ triggers: envelope["Triggers"] || [],
26
+ raw: data
27
+ )
28
+ end
29
+
30
+ private
31
+
32
+ def build_node(raw)
33
+ plans = raw["Plans"]
34
+ children = plans.is_a?(Array) ? plans.map { |child| build_node(child) } : []
35
+ Node.new(raw, children: children)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Plan
5
+ class Tree
6
+ attr_reader :root, :planning_time, :execution_time, :triggers, :raw
7
+
8
+ def initialize(root, planning_time: nil, execution_time: nil, triggers: [], raw: nil)
9
+ @root = root
10
+ @planning_time = planning_time
11
+ @execution_time = execution_time
12
+ @triggers = triggers
13
+ @raw = raw
14
+ end
15
+
16
+ def each_node(&block)
17
+ return enum_for(:each_node) unless block
18
+
19
+ traverse(root, &block)
20
+ end
21
+
22
+ def total_cost
23
+ root.total_cost
24
+ end
25
+
26
+ def total_time
27
+ return execution_time if execution_time
28
+
29
+ root.actual_total_time
30
+ end
31
+
32
+ def analyzed?
33
+ root.analyzed?
34
+ end
35
+
36
+ def total_rows
37
+ root.actual_rows || root.plan_rows
38
+ end
39
+
40
+ def warnings
41
+ each_node.flat_map(&:warnings)
42
+ end
43
+
44
+ private
45
+
46
+ def traverse(node, &block)
47
+ block.call(node)
48
+ node.children.each { |child| traverse(child, &block) }
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ class PlanRunner
5
+ def self.call(**kwargs)
6
+ new(**kwargs).call
7
+ end
8
+
9
+ def initialize(relation:, analyze: false, buffers: false, verbose: false, format: "JSON")
10
+ @relation = relation
11
+ @analyze = analyze
12
+ @buffers = buffers
13
+ @verbose = verbose
14
+ @format = format
15
+ end
16
+
17
+ def call
18
+ reject_raw_sql_adapter_under_analyze!
19
+
20
+ sql = @relation.to_sql
21
+ Benedictus::SafetyGuard.assert_select!(sql) if @analyze
22
+
23
+ explain_sql = "EXPLAIN (#{options_string}) #{sql}"
24
+
25
+ if @analyze
26
+ Benedictus::SafetyGuard.with_rollback(@relation.klass) { execute(explain_sql) }
27
+ else
28
+ execute(explain_sql)
29
+ end
30
+ rescue StandardError => e
31
+ raise if e.is_a?(Benedictus::Error)
32
+
33
+ raise Benedictus::DatabaseError, "EXPLAIN failed: #{e.class}: #{e.message}"
34
+ end
35
+
36
+ private
37
+
38
+ def reject_raw_sql_adapter_under_analyze!
39
+ return unless @analyze
40
+ return unless defined?(Benedictus::RelationResolver::RawSqlAdapter)
41
+ return unless @relation.is_a?(Benedictus::RelationResolver::RawSqlAdapter)
42
+
43
+ raise Benedictus::UnsafeQueryError,
44
+ "--analyze cannot be used with raw SQL strings; pass an ActiveRecord::Relation."
45
+ end
46
+
47
+ def options_string
48
+ opts = []
49
+ opts << "ANALYZE" if @analyze
50
+ opts << "BUFFERS" if @buffers && @analyze
51
+ opts << "VERBOSE" if @verbose
52
+ opts << "FORMAT #{@format}"
53
+ opts.join(", ")
54
+ end
55
+
56
+ def execute(sql)
57
+ result = @relation.klass.connection.exec_query(sql)
58
+ return result if @format != "JSON"
59
+
60
+ payload = extract_json_payload(result)
61
+ JSON.parse(payload)
62
+ end
63
+
64
+ def extract_json_payload(result)
65
+ rows = result.respond_to?(:rows) ? result.rows : result
66
+ first = Array(rows).first
67
+ first = first.first if first.is_a?(Array)
68
+ first
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ class RailsLoader
5
+ ENV_VAR = "BENEDICTUS_RAILS_ENV"
6
+
7
+ def self.load!(start_dir: Dir.pwd)
8
+ new(start_dir: start_dir).load!
9
+ end
10
+
11
+ def initialize(start_dir: Dir.pwd)
12
+ @start_dir = File.expand_path(start_dir)
13
+ end
14
+
15
+ def load!
16
+ ENV["RAILS_ENV"] ||= ENV[ENV_VAR] || "development"
17
+
18
+ env_path = locate_environment_rb
19
+ require env_path
20
+ env_path
21
+ end
22
+
23
+ private
24
+
25
+ def locate_environment_rb
26
+ dir = @start_dir
27
+ loop do
28
+ candidate = File.join(dir, "config", "environment.rb")
29
+ return candidate if File.file?(candidate)
30
+
31
+ parent = File.dirname(dir)
32
+ break if parent == dir
33
+
34
+ dir = parent
35
+ end
36
+
37
+ raise Benedictus::RailsNotFoundError,
38
+ "Could not find config/environment.rb above #{@start_dir}. Run benedictus from inside a Rails application."
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ class RelationResolver
5
+ MAX_DEPTH = 5
6
+
7
+ class RawSqlAdapter
8
+ attr_reader :sql
9
+
10
+ def initialize(sql)
11
+ @sql = sql
12
+ end
13
+
14
+ def to_sql
15
+ @sql
16
+ end
17
+
18
+ def klass
19
+ defined?(ActiveRecord::Base) ? ActiveRecord::Base : nil
20
+ end
21
+ end
22
+
23
+ def self.call(value)
24
+ new.call(value)
25
+ end
26
+
27
+ def call(value, depth: 0)
28
+ raise UnresolvableExpression, "exceeded resolution depth (#{MAX_DEPTH})" if depth > MAX_DEPTH
29
+
30
+ return value if active_record_relation?(value)
31
+ return call(value.call, depth: depth + 1) if value.respond_to?(:call)
32
+ return call(value.to_relation, depth: depth + 1) if value.respond_to?(:to_relation)
33
+ return RawSqlAdapter.new(value.to_sql) if value.respond_to?(:to_sql)
34
+
35
+ raise UnresolvableExpression,
36
+ "Expression returned a #{value.class}; expected an ActiveRecord::Relation " \
37
+ "or something responding to .to_sql."
38
+ end
39
+
40
+ private
41
+
42
+ def active_record_relation?(value)
43
+ defined?(ActiveRecord::Relation) && value.is_a?(ActiveRecord::Relation)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Renderers
5
+ class JsonRenderer
6
+ def initialize(plan_data)
7
+ @plan_data = plan_data
8
+ end
9
+
10
+ def render
11
+ JSON.pretty_generate(@plan_data)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Benedictus
4
+ module Renderers
5
+ class RawRenderer
6
+ def initialize(result)
7
+ @result = result
8
+ end
9
+
10
+ def render
11
+ rows = @result.respond_to?(:rows) ? @result.rows : @result
12
+ Array(rows).flatten.join("\n")
13
+ end
14
+ end
15
+ end
16
+ end