sql_genius 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +195 -0
  3. data/LICENSE.txt +65 -0
  4. data/README.md +178 -0
  5. data/Rakefile +8 -0
  6. data/app/controllers/concerns/sql_genius/ai_features.rb +332 -0
  7. data/app/controllers/concerns/sql_genius/database_analysis.rb +67 -0
  8. data/app/controllers/concerns/sql_genius/query_execution.rb +87 -0
  9. data/app/controllers/concerns/sql_genius/shared_view_helpers.rb +76 -0
  10. data/app/controllers/sql_genius/base_controller.rb +29 -0
  11. data/app/controllers/sql_genius/queries_controller.rb +94 -0
  12. data/app/views/layouts/sql_genius/application.html.erb +285 -0
  13. data/config/routes.rb +34 -0
  14. data/docs/guides/ai-features.md +115 -0
  15. data/docs/guides/getting-started-rails.md +118 -0
  16. data/docs/guides/ssh-tunnel-connections.md +151 -0
  17. data/docs/screenshots/ai_tools.png +0 -0
  18. data/docs/screenshots/dashboard.png +0 -0
  19. data/docs/screenshots/duplicate_indexes.png +0 -0
  20. data/docs/screenshots/query_explore.png +0 -0
  21. data/docs/screenshots/query_stats.png +0 -0
  22. data/docs/screenshots/server.png +0 -0
  23. data/docs/screenshots/table_sizes.png +0 -0
  24. data/lib/generators/sql_genius/install/install_generator.rb +19 -0
  25. data/lib/generators/sql_genius/install/templates/initializer.rb +56 -0
  26. data/lib/sql_genius/configuration.rb +114 -0
  27. data/lib/sql_genius/core/ai/client.rb +155 -0
  28. data/lib/sql_genius/core/ai/config.rb +47 -0
  29. data/lib/sql_genius/core/ai/connection_advisor.rb +96 -0
  30. data/lib/sql_genius/core/ai/describe_query.rb +41 -0
  31. data/lib/sql_genius/core/ai/dialect_hints.rb +35 -0
  32. data/lib/sql_genius/core/ai/index_advisor.rb +43 -0
  33. data/lib/sql_genius/core/ai/index_planner.rb +91 -0
  34. data/lib/sql_genius/core/ai/innodb_interpreter.rb +78 -0
  35. data/lib/sql_genius/core/ai/migration_risk.rb +51 -0
  36. data/lib/sql_genius/core/ai/optimization.rb +81 -0
  37. data/lib/sql_genius/core/ai/pattern_grouper.rb +94 -0
  38. data/lib/sql_genius/core/ai/rewrite_query.rb +51 -0
  39. data/lib/sql_genius/core/ai/schema_context_builder.rb +82 -0
  40. data/lib/sql_genius/core/ai/schema_review.rb +46 -0
  41. data/lib/sql_genius/core/ai/suggestion.rb +74 -0
  42. data/lib/sql_genius/core/ai/variable_reviewer.rb +113 -0
  43. data/lib/sql_genius/core/ai/workload_digest.rb +86 -0
  44. data/lib/sql_genius/core/analysis/columns.rb +63 -0
  45. data/lib/sql_genius/core/analysis/duplicate_indexes.rb +85 -0
  46. data/lib/sql_genius/core/analysis/query_history.rb +50 -0
  47. data/lib/sql_genius/core/analysis/query_stats.rb +76 -0
  48. data/lib/sql_genius/core/analysis/server_overview.rb +294 -0
  49. data/lib/sql_genius/core/analysis/stats_collector.rb +118 -0
  50. data/lib/sql_genius/core/analysis/stats_history.rb +42 -0
  51. data/lib/sql_genius/core/analysis/table_sizes.rb +52 -0
  52. data/lib/sql_genius/core/analysis/unused_indexes.rb +62 -0
  53. data/lib/sql_genius/core/column_definition.rb +30 -0
  54. data/lib/sql_genius/core/connection/active_record_adapter.rb +75 -0
  55. data/lib/sql_genius/core/connection/fake_adapter.rb +114 -0
  56. data/lib/sql_genius/core/connection.rb +37 -0
  57. data/lib/sql_genius/core/execution_result.rb +27 -0
  58. data/lib/sql_genius/core/index_definition.rb +23 -0
  59. data/lib/sql_genius/core/query_builders/mysql.rb +169 -0
  60. data/lib/sql_genius/core/query_builders/postgresql.rb +185 -0
  61. data/lib/sql_genius/core/query_builders.rb +27 -0
  62. data/lib/sql_genius/core/query_explainer.rb +113 -0
  63. data/lib/sql_genius/core/query_runner/config.rb +21 -0
  64. data/lib/sql_genius/core/query_runner.rb +123 -0
  65. data/lib/sql_genius/core/result.rb +43 -0
  66. data/lib/sql_genius/core/server_info.rb +54 -0
  67. data/lib/sql_genius/core/sql_validator.rb +149 -0
  68. data/lib/sql_genius/core/views/sql_genius/queries/_shared_results.html.erb +59 -0
  69. data/lib/sql_genius/core/views/sql_genius/queries/_tab_ai_tools.html.erb +43 -0
  70. data/lib/sql_genius/core/views/sql_genius/queries/_tab_dashboard.html.erb +97 -0
  71. data/lib/sql_genius/core/views/sql_genius/queries/_tab_duplicate_indexes.html.erb +35 -0
  72. data/lib/sql_genius/core/views/sql_genius/queries/_tab_query_explorer.html.erb +110 -0
  73. data/lib/sql_genius/core/views/sql_genius/queries/_tab_query_stats.html.erb +43 -0
  74. data/lib/sql_genius/core/views/sql_genius/queries/_tab_server.html.erb +59 -0
  75. data/lib/sql_genius/core/views/sql_genius/queries/_tab_slow_queries.html.erb +17 -0
  76. data/lib/sql_genius/core/views/sql_genius/queries/_tab_table_sizes.html.erb +33 -0
  77. data/lib/sql_genius/core/views/sql_genius/queries/_tab_unused_indexes.html.erb +54 -0
  78. data/lib/sql_genius/core/views/sql_genius/queries/dashboard.html.erb +1826 -0
  79. data/lib/sql_genius/core/views/sql_genius/queries/query_detail.html.erb +465 -0
  80. data/lib/sql_genius/core.rb +72 -0
  81. data/lib/sql_genius/engine.rb +31 -0
  82. data/lib/sql_genius/slow_query_monitor.rb +43 -0
  83. data/lib/sql_genius/version.rb +5 -0
  84. data/lib/sql_genius.rb +29 -0
  85. data/sql_genius.gemspec +47 -0
  86. metadata +171 -0
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Builds and sends a "describe this query" prompt to Core::Ai::Client.
7
+ # Pure function of SQL + config.domain_context — no connection lookup.
8
+ #
9
+ # Extracted from app/controllers/concerns/sql_genius/ai_features.rb
10
+ # in Phase 2a.
11
+ class DescribeQuery
12
+ def initialize(client, config)
13
+ @client = client
14
+ @config = config
15
+ end
16
+
17
+ def call(sql)
18
+ messages = [
19
+ { role: "system", content: system_prompt },
20
+ { role: "user", content: sql },
21
+ ]
22
+ @client.chat(messages: messages)
23
+ end
24
+
25
+ private
26
+
27
+ def system_prompt
28
+ <<~PROMPT
29
+ You are a MySQL query explainer. Given a SQL query, explain in plain English:
30
+ 1. What the query does (tables involved, joins, filters, aggregations)
31
+ 2. How data flows through the query
32
+ 3. Any subtle behaviors (implicit type casts, NULL handling in NOT IN, DISTINCT effects, etc.)
33
+ 4. Potential performance concerns visible from the SQL structure alone
34
+ #{@config.domain_context}
35
+ Respond with JSON: {"explanation": "your plain-English explanation using markdown formatting"}
36
+ PROMPT
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Snippets injected into AI system prompts so the model generates SQL in
7
+ # the right dialect. Without these, prompts hardcode "MySQL" and tell
8
+ # the model to use backticks — which produces broken SQL on PostgreSQL
9
+ # (PG uses double quotes for identifiers and has no backtick syntax).
10
+ module DialectHints
11
+ extend self
12
+
13
+ # Display name suitable for "You are a SQL assistant for a #{name_for}
14
+ # database." prompts.
15
+ def name_for(connection)
16
+ case connection.server_version.dialect
17
+ when :postgresql then "PostgreSQL"
18
+ else "MySQL/MariaDB"
19
+ end
20
+ end
21
+
22
+ # Identifier-quoting rule string for inclusion in a numbered Rules
23
+ # list in a prompt. PG uses double quotes; MySQL/MariaDB uses
24
+ # backticks.
25
+ def identifier_quoting_rule(connection)
26
+ if connection.server_version.postgresql?
27
+ %(Use double quotes ("col_name") for case-sensitive identifiers; otherwise leave them bare.)
28
+ else
29
+ "Use backticks (`col_name`) for table and column names."
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ class IndexAdvisor
7
+ def initialize(client, config, connection)
8
+ @client = client
9
+ @config = config
10
+ @connection = connection
11
+ end
12
+
13
+ def call(sql, explain_rows)
14
+ tables = SqlValidator.extract_table_references(sql, @connection)
15
+ schema = SchemaContextBuilder.new(@connection).call(tables, detail: :with_cardinality)
16
+ explain_text = explain_rows.map { |row| row.join(" | ") }.join("\n")
17
+
18
+ messages = [
19
+ { role: "system", content: system_prompt },
20
+ { role: "user", content: "Query:\n#{sql}\n\nEXPLAIN:\n#{explain_text}\n\nSchema:\n#{schema}" },
21
+ ]
22
+ @client.chat(messages: messages)
23
+ end
24
+
25
+ private
26
+
27
+ def system_prompt
28
+ <<~PROMPT
29
+ You are a #{DialectHints.name_for(@connection)} index advisor. Given a query, its EXPLAIN output, and current index/cardinality information, suggest optimal indexes. Consider:
30
+ - Composite index column ordering (most selective first, or matching query order)
31
+ - Covering indexes to avoid table lookups
32
+ - Partial indexes for long string columns
33
+ - Write-side costs (if this is a high-write table, note the INSERT/UPDATE overhead)
34
+ - Whether existing indexes could be extended rather than creating new ones
35
+ #{@config.domain_context}
36
+
37
+ Respond with JSON: {"indexes": "markdown-formatted recommendations with exact CREATE INDEX statements, rationale for column ordering, and estimated impact. Include any indexes that should be DROPPED as part of the change."}
38
+ PROMPT
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ class IndexPlanner
7
+ def initialize(client, config, connection)
8
+ @client = client
9
+ @config = config
10
+ @connection = connection
11
+ end
12
+
13
+ def call(tables = nil)
14
+ target_tables = resolve_tables(tables)
15
+ return { "plan" => "No tables found to analyze." } if target_tables.empty?
16
+
17
+ unused = Analysis::UnusedIndexes.new(@connection).call.indexes
18
+ duplicates = Analysis::DuplicateIndexes.new(@connection, blocked_tables: []).call
19
+ schema = SchemaContextBuilder.new(@connection).call(target_tables, detail: :with_cardinality)
20
+
21
+ index_map = target_tables.to_h do |table|
22
+ [table, @connection.indexes_for(table).map do |idx|
23
+ "#{"UNIQUE " if idx.unique}INDEX #{idx.name} (#{idx.columns.join(", ")})"
24
+ end,]
25
+ end
26
+
27
+ messages = [
28
+ { role: "system", content: system_prompt },
29
+ { role: "user", content: user_prompt(schema, unused, duplicates, index_map) },
30
+ ]
31
+ @client.chat(messages: messages)
32
+ end
33
+
34
+ private
35
+
36
+ def resolve_tables(tables)
37
+ list = Array(tables).reject { |t| t.to_s.empty? }
38
+ return list unless list.empty?
39
+
40
+ top_tables_by_size
41
+ end
42
+
43
+ def top_tables_by_size
44
+ db = @connection.current_database
45
+ result = @connection.exec_query(
46
+ "SELECT table_name FROM information_schema.tables " \
47
+ "WHERE table_schema = #{@connection.quote(db)} AND table_type = 'BASE TABLE' " \
48
+ "ORDER BY (data_length + index_length) DESC LIMIT 10",
49
+ )
50
+ result.rows.map(&:first)
51
+ rescue StandardError
52
+ @connection.tables.first(10)
53
+ end
54
+
55
+ def user_prompt(schema, unused, duplicates, index_map)
56
+ parts = ["Schema with cardinality:\n#{schema}"]
57
+
58
+ if unused.any?
59
+ unused_text = unused.map { |u| "#{u[:table]}.#{u[:index_name]} (reads=#{u[:reads]}, writes=#{u[:writes]})" }
60
+ parts << "Unused indexes (zero reads):\n#{unused_text.join("\n")}"
61
+ end
62
+
63
+ if duplicates.any?
64
+ dup_text = duplicates.map do |d|
65
+ "#{d[:table]}: #{d[:duplicate_index]} (#{d[:duplicate_columns].join(", ")}) covered by #{d[:covered_by_index]} (#{d[:covered_by_columns].join(", ")})"
66
+ end
67
+ parts << "Duplicate indexes:\n#{dup_text.join("\n")}"
68
+ end
69
+
70
+ index_summary = index_map.map { |table, idxs| "#{table}: #{idxs.any? ? idxs.join("; ") : "NONE"}" }
71
+ parts << "Current indexes per table:\n#{index_summary.join("\n")}"
72
+
73
+ parts.join("\n\n")
74
+ end
75
+
76
+ def system_prompt
77
+ <<~PROMPT
78
+ You are a #{DialectHints.name_for(@connection)} index consolidation planner. Given schema information, unused indexes, duplicate indexes, and current index listings, produce a consolidated optimization plan. For each recommendation:
79
+ - DROP redundant or unused indexes using #{DialectHints.name_for(@connection)} syntax (e.g. `ALTER TABLE t DROP INDEX i;` on MySQL/MariaDB, `DROP INDEX "i";` on PostgreSQL)
80
+ - MERGE overlapping indexes into composites where beneficial
81
+ - KEEP indexes that are actively used and well-structured
82
+ - ADD new composite indexes where query patterns suggest benefit
83
+ - Provide rationale for each change and estimated impact on read/write performance
84
+ #{@config.domain_context}
85
+ Respond with JSON: {"plan": "markdown with dialect-appropriate DROP INDEX / CREATE INDEX statements, rationale for each change, and estimated impact"}
86
+ PROMPT
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Interprets SHOW ENGINE INNODB STATUS output in plain English.
7
+ # Combines the raw InnoDB status text with key metrics from
8
+ # ServerOverview to give the LLM full context for its analysis.
9
+ class InnodbInterpreter
10
+ MAX_STATUS_LENGTH = 4000
11
+
12
+ def initialize(client, config, connection)
13
+ @client = client
14
+ @config = config
15
+ @connection = connection
16
+ end
17
+
18
+ def call
19
+ if @connection.server_version.postgresql?
20
+ raise Core::UnsupportedDialect.for_postgresql("InnoDB Health Interpreter")
21
+ end
22
+
23
+ status_text = fetch_innodb_status
24
+ metrics = fetch_innodb_metrics
25
+
26
+ messages = [
27
+ { role: "system", content: system_prompt },
28
+ { role: "user", content: user_prompt(status_text, metrics) },
29
+ ]
30
+ @client.chat(messages: messages)
31
+ end
32
+
33
+ private
34
+
35
+ def fetch_innodb_status
36
+ result = @connection.exec_query("SHOW ENGINE INNODB STATUS")
37
+ text = result.rows.first&.dig(2).to_s
38
+ text.length > MAX_STATUS_LENGTH ? text[0, MAX_STATUS_LENGTH] : text
39
+ end
40
+
41
+ def fetch_innodb_metrics
42
+ overview = Analysis::ServerOverview.new(@connection).call
43
+ overview[:innodb]
44
+ end
45
+
46
+ def system_prompt
47
+ <<~PROMPT
48
+ You are a MySQL InnoDB internals expert. Analyze the SHOW ENGINE INNODB STATUS output and the supplementary metrics below. Provide a plain-English interpretation organized by these sections:
49
+ - **Deadlocks**: recent deadlock information, lock wait chains, affected transactions
50
+ - **Transaction History**: history list length, purge lag, long-running transactions
51
+ - **Buffer Pool**: hit rate, dirty page ratio, free pages, eviction pressure
52
+ - **I/O**: pending reads/writes, log sequence numbers, checkpoint age, log sizing adequacy
53
+ - **Semaphores**: mutex/rw-lock waits, spin rounds, OS waits indicating contention
54
+
55
+ For each section, explain what the numbers mean in practical terms and recommend specific actions if problems are detected.
56
+ #{@config.domain_context}
57
+ Respond with JSON: {"findings": "markdown analysis organized by: Deadlocks, Transaction History, Buffer Pool, I/O, Semaphores. Each section should include current state assessment, risk level, and actionable recommendations."}
58
+ PROMPT
59
+ end
60
+
61
+ def user_prompt(status_text, metrics)
62
+ lines = ["== SHOW ENGINE INNODB STATUS =="]
63
+ lines << status_text
64
+ lines << ""
65
+ lines << "== InnoDB Metrics Summary =="
66
+ lines << "Buffer Pool Size: #{metrics[:buffer_pool_mb]} MB"
67
+ lines << "Buffer Pool Hit Rate: #{metrics[:buffer_pool_hit_rate]}%"
68
+ lines << "Dirty Pages: #{metrics[:buffer_pool_pages_dirty]}"
69
+ lines << "Free Pages: #{metrics[:buffer_pool_pages_free]}"
70
+ lines << "Total Pages: #{metrics[:buffer_pool_pages_total]}"
71
+ lines << "Row Lock Waits: #{metrics[:row_lock_waits]}"
72
+ lines << "Row Lock Time (ms): #{metrics[:row_lock_time_ms]}"
73
+ lines.join("\n")
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ class MigrationRisk
7
+ def initialize(client, config, connection)
8
+ @client = client
9
+ @config = config
10
+ @connection = connection
11
+ end
12
+
13
+ def call(migration_sql)
14
+ tables = extract_table_names(migration_sql)
15
+ schema = SchemaContextBuilder.new(@connection).call(tables, detail: :basic)
16
+ schema_text = schema.to_s.empty? ? "Could not determine" : schema
17
+
18
+ messages = [
19
+ { role: "system", content: system_prompt },
20
+ { role: "user", content: "Migration:\n#{migration_sql}\n\nAffected Tables:\n#{schema_text}" },
21
+ ]
22
+ @client.chat(messages: messages)
23
+ end
24
+
25
+ private
26
+
27
+ def extract_table_names(migration_sql)
28
+ # Match Rails migration helpers and raw SQL ALTER TABLE statements.
29
+ rails_matches = migration_sql.scan(/(?:create_table|add_column|remove_column|add_index|remove_index|rename_column|change_column|alter\s+table)\s+[:"]?(\w+)/i).flatten
30
+ sql_matches = migration_sql.scan(/ALTER\s+TABLE\s+`?(\w+)`?/i).flatten
31
+ (rails_matches + sql_matches).uniq
32
+ end
33
+
34
+ def system_prompt
35
+ <<~PROMPT
36
+ You are a #{DialectHints.name_for(@connection)} migration risk assessor. Given a Rails migration or DDL, evaluate:
37
+ 1. Will this lock the table? For how long given the row count?
38
+ 2. Is this safe to run during traffic, or does it need a maintenance window?
39
+ 3. Should pt-online-schema-change or gh-ost be used instead?
40
+ 4. Will it break or degrade any of the active queries against this table?
41
+ 5. Are there any data loss risks?
42
+ 6. What is the recommended deployment strategy?
43
+ #{@config.domain_context}
44
+
45
+ Respond with JSON: {"risk_level": "low|medium|high|critical", "assessment": "markdown-formatted risk assessment with specific recommendations and estimated lock duration"}
46
+ PROMPT
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,81 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Analyses a SQL query + its EXPLAIN output and asks the AI client
7
+ # for optimization suggestions.
8
+ #
9
+ # Construct with:
10
+ # connection - a Core::Connection implementation
11
+ # client - a Core::Ai::Client
12
+ # config - the Core::Ai::Config
13
+ #
14
+ # Call:
15
+ # .call(sql, explain_rows, allowed_tables)
16
+ # explain_rows - Array of arrays OR a pre-formatted String
17
+ # -> Hash with "suggestions" key
18
+ class Optimization
19
+ def initialize(connection, client, config)
20
+ @connection = connection
21
+ @client = client
22
+ @config = config
23
+ end
24
+
25
+ def call(sql, explain_rows, allowed_tables)
26
+ schema = build_schema_description(allowed_tables)
27
+ messages = [
28
+ { role: "system", content: system_prompt(schema) },
29
+ { role: "user", content: user_prompt(sql, explain_rows) },
30
+ ]
31
+
32
+ @client.chat(messages: messages)
33
+ end
34
+
35
+ private
36
+
37
+ def system_prompt(schema_description)
38
+ <<~PROMPT
39
+ You are a #{DialectHints.name_for(@connection)} query optimization expert. Given a SQL query and its EXPLAIN output, analyze the query execution plan and provide actionable optimization suggestions.
40
+
41
+ Available schema:
42
+ #{schema_description}
43
+
44
+ Respond with JSON:
45
+ {
46
+ "suggestions": "Markdown-formatted analysis and suggestions. Include: 1) Summary of current execution plan (scan types, rows examined). 2) Specific recommendations such as indexes to add (provide exact CREATE INDEX statements), query rewrites, or structural changes. 3) Expected impact of each suggestion."
47
+ }
48
+ PROMPT
49
+ end
50
+
51
+ def user_prompt(sql, explain_rows)
52
+ <<~PROMPT
53
+ SQL Query:
54
+ #{sql}
55
+
56
+ EXPLAIN Output:
57
+ #{format_explain(explain_rows)}
58
+ PROMPT
59
+ end
60
+
61
+ def format_explain(explain_rows)
62
+ return explain_rows if explain_rows.is_a?(String)
63
+
64
+ explain_rows.map { |row| row.join(" | ") }.join("\n")
65
+ end
66
+
67
+ def build_schema_description(allowed_tables)
68
+ allowed_tables.map do |table|
69
+ next unless @connection.tables.include?(table)
70
+
71
+ columns = @connection.columns_for(table).map { |c| "#{c.name} (#{c.type})" }
72
+ indexes = @connection.indexes_for(table).map { |idx| "#{idx.name}: [#{idx.columns.join(", ")}]#{" UNIQUE" if idx.unique}" }
73
+ desc = "#{table}: #{columns.join(", ")}"
74
+ desc += "\n Indexes: #{indexes.join("; ")}" if indexes.any?
75
+ desc
76
+ end.compact.join("\n")
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Groups slow queries by shared root cause so a single fix can improve
7
+ # multiple queries at once. Pulls high-cost statements from
8
+ # performance_schema, extracts referenced tables, builds schema context,
9
+ # and asks the LLM to cluster queries by underlying issue.
10
+ #
11
+ # Construct with:
12
+ # connection - a Core::Connection implementation
13
+ # client - a Core::Ai::Client
14
+ # config - the Core::Ai::Config
15
+ #
16
+ # Call:
17
+ # .call() -> Hash with "groups" key containing markdown analysis
18
+ class PatternGrouper
19
+ QUERY_LIMIT = 30
20
+ ROWS_RATIO_THRESHOLD = 10
21
+ AVG_TIME_THRESHOLD = 50
22
+
23
+ def initialize(connection, client, config)
24
+ @connection = connection
25
+ @client = client
26
+ @config = config
27
+ end
28
+
29
+ def call
30
+ all_stats = Analysis::QueryStats.new(@connection).call(sort: "total_time", limit: QUERY_LIMIT)
31
+ high_cost = all_stats.select { |s| s[:rows_ratio] > ROWS_RATIO_THRESHOLD || s[:avg_time_ms] > AVG_TIME_THRESHOLD }
32
+ return { "groups" => "No high-cost queries found to analyze." } if high_cost.empty?
33
+
34
+ tables = extract_tables(high_cost)
35
+ schema = tables.any? ? SchemaContextBuilder.new(@connection).call(tables, detail: :basic) : ""
36
+
37
+ messages = [
38
+ { role: "system", content: system_prompt },
39
+ { role: "user", content: user_prompt(high_cost, schema) },
40
+ ]
41
+
42
+ @client.chat(messages: messages)
43
+ end
44
+
45
+ private
46
+
47
+ def extract_tables(stats)
48
+ stats.flat_map { |s| SqlValidator.extract_table_references(s[:sql], @connection) }.uniq
49
+ end
50
+
51
+ def system_prompt
52
+ prompt = <<~PROMPT
53
+ You are a #{DialectHints.name_for(@connection)} performance analyst specializing in root-cause analysis.
54
+ PROMPT
55
+
56
+ if @config.domain_context && !@config.domain_context.empty?
57
+ prompt += <<~PROMPT
58
+
59
+ Domain context:
60
+ #{@config.domain_context}
61
+ PROMPT
62
+ end
63
+
64
+ prompt += <<~PROMPT
65
+
66
+ Given a set of high-cost queries and the schema they reference, group them by shared root cause.
67
+ For each group provide:
68
+ 1. The shared root cause (e.g., missing index, full table scan, implicit type conversion)
69
+ 2. The affected queries (numbered)
70
+ 3. A single fix that addresses all queries in the group (with exact SQL: CREATE INDEX, ALTER TABLE, etc.)
71
+ 4. Estimated performance impact
72
+
73
+ Respond with JSON: {"groups": "markdown with each group showing: the shared root cause, affected queries (numbered), the single fix that addresses all of them (with exact SQL), and estimated impact"}
74
+ PROMPT
75
+
76
+ prompt
77
+ end
78
+
79
+ def user_prompt(stats, schema)
80
+ formatted = stats.map.with_index(1) do |s, i|
81
+ "#{i}. SQL: #{s[:sql]}\n " \
82
+ "calls=#{s[:calls]}, avg_time_ms=#{s[:avg_time_ms]}, " \
83
+ "rows_ratio=#{s[:rows_ratio]}, rows_examined=#{s[:rows_examined]}, " \
84
+ "tmp_disk_tables=#{s[:tmp_disk_tables]}"
85
+ end.join("\n")
86
+
87
+ parts = ["High-cost queries (rows_ratio > #{ROWS_RATIO_THRESHOLD} OR avg_time_ms > #{AVG_TIME_THRESHOLD}):\n\n#{formatted}"]
88
+ parts << "Schema context:\n#{schema}" unless schema.empty?
89
+ parts.join("\n\n")
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Suggests a rewritten version of a SQL query based on the schema
7
+ # context of the tables it references.
8
+ class RewriteQuery
9
+ def initialize(client, config, connection)
10
+ @client = client
11
+ @config = config
12
+ @connection = connection
13
+ end
14
+
15
+ def call(sql)
16
+ tables = SqlValidator.extract_table_references(sql, @connection)
17
+ schema = SchemaContextBuilder.new(@connection).call(tables, detail: :basic)
18
+
19
+ messages = [
20
+ { role: "system", content: system_prompt(schema) },
21
+ { role: "user", content: sql },
22
+ ]
23
+ @client.chat(messages: messages)
24
+ end
25
+
26
+ private
27
+
28
+ def system_prompt(schema)
29
+ <<~PROMPT
30
+ You are a #{DialectHints.name_for(@connection)} query rewrite expert. Analyze the SQL for anti-patterns and suggest a rewritten version. Look for:
31
+ - SELECT * when specific columns would suffice
32
+ - Correlated subqueries that could be JOINs
33
+ - OR conditions preventing index use (suggest UNION ALL)
34
+ - LIKE '%prefix' patterns (leading wildcard)
35
+ - Implicit type conversions in WHERE clauses
36
+ - NOT IN with NULLable columns (suggest NOT EXISTS)
37
+ - ORDER BY on non-indexed columns with LIMIT
38
+ - Unnecessary DISTINCT
39
+ - Functions on indexed columns in WHERE (e.g., DATE(created_at) instead of range)
40
+
41
+ Available schema:
42
+ #{schema}
43
+ #{@config.domain_context}
44
+
45
+ Respond with JSON: {"original": "the original SQL", "rewritten": "the improved SQL", "changes": "markdown list of each change and why it helps"}
46
+ PROMPT
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlGenius
4
+ module Core
5
+ module Ai
6
+ # Builds formatted schema-description strings for AI prompt context.
7
+ # Used by SchemaReview, RewriteQuery, IndexAdvisor, and MigrationRisk.
8
+ #
9
+ # Consolidates the ~10 lines of schema description logic that were
10
+ # duplicated across 4 AI features in the Rails adapter's
11
+ # app/controllers/concerns/sql_genius/ai_features.rb.
12
+ class SchemaContextBuilder
13
+ def initialize(connection)
14
+ @connection = connection
15
+ end
16
+
17
+ # Returns a formatted multi-line string describing the given tables.
18
+ #
19
+ # detail:
20
+ # :basic — name, row count, primary key, columns, indexes
21
+ # :with_cardinality — adds information_schema.STATISTICS cardinality per index
22
+ def call(tables, detail: :basic)
23
+ Array(tables).filter_map { |t| describe_table(t, detail: detail) }.join("\n\n")
24
+ end
25
+
26
+ private
27
+
28
+ def describe_table(table, detail:)
29
+ return unless @connection.tables.include?(table)
30
+
31
+ cols = @connection.columns_for(table).map do |c|
32
+ parts = ["#{c.name} #{c.sql_type}"]
33
+ parts << "NOT NULL" unless c.null
34
+ parts << "DEFAULT #{c.default}" if c.default
35
+ parts.join(" ")
36
+ end
37
+
38
+ pk = @connection.primary_key(table)
39
+ indexes = @connection.indexes_for(table).map do |idx|
40
+ "#{"UNIQUE " if idx.unique}INDEX #{idx.name} (#{idx.columns.join(", ")})"
41
+ end
42
+
43
+ row_count = fetch_row_count(table)
44
+
45
+ parts = [
46
+ "Table: #{table} (~#{row_count || "unknown"} rows)",
47
+ "Primary Key: #{pk || "NONE"}",
48
+ "Columns: #{cols.join(", ")}",
49
+ "Indexes: #{indexes.any? ? indexes.join(", ") : "NONE"}",
50
+ ]
51
+
52
+ parts << index_cardinality(table) if detail == :with_cardinality
53
+
54
+ parts.join("\n")
55
+ end
56
+
57
+ def fetch_row_count(table)
58
+ sql = "SELECT TABLE_ROWS FROM information_schema.tables " \
59
+ "WHERE table_schema = #{@connection.quote(@connection.current_database)} " \
60
+ "AND table_name = #{@connection.quote(table)}"
61
+ result = @connection.exec_query(sql)
62
+ result.rows.first&.first
63
+ rescue StandardError
64
+ nil
65
+ end
66
+
67
+ def index_cardinality(table)
68
+ sql = "SELECT INDEX_NAME, COLUMN_NAME, CARDINALITY, SEQ_IN_INDEX " \
69
+ "FROM information_schema.STATISTICS " \
70
+ "WHERE TABLE_SCHEMA = #{@connection.quote(@connection.current_database)} " \
71
+ "AND TABLE_NAME = #{@connection.quote(table)} " \
72
+ "ORDER BY INDEX_NAME, SEQ_IN_INDEX"
73
+ result = @connection.exec_query(sql)
74
+ stats = result.rows.map { |r| "#{r[0]}.#{r[1]}: cardinality=#{r[2]}" }
75
+ "Cardinality: #{stats.join(", ")}"
76
+ rescue StandardError
77
+ "Cardinality: (unavailable)"
78
+ end
79
+ end
80
+ end
81
+ end
82
+ end