sql_genius 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +195 -0
- data/LICENSE.txt +65 -0
- data/README.md +178 -0
- data/Rakefile +8 -0
- data/app/controllers/concerns/sql_genius/ai_features.rb +332 -0
- data/app/controllers/concerns/sql_genius/database_analysis.rb +67 -0
- data/app/controllers/concerns/sql_genius/query_execution.rb +87 -0
- data/app/controllers/concerns/sql_genius/shared_view_helpers.rb +76 -0
- data/app/controllers/sql_genius/base_controller.rb +29 -0
- data/app/controllers/sql_genius/queries_controller.rb +94 -0
- data/app/views/layouts/sql_genius/application.html.erb +285 -0
- data/config/routes.rb +34 -0
- data/docs/guides/ai-features.md +115 -0
- data/docs/guides/getting-started-rails.md +118 -0
- data/docs/guides/ssh-tunnel-connections.md +151 -0
- data/docs/screenshots/ai_tools.png +0 -0
- data/docs/screenshots/dashboard.png +0 -0
- data/docs/screenshots/duplicate_indexes.png +0 -0
- data/docs/screenshots/query_explore.png +0 -0
- data/docs/screenshots/query_stats.png +0 -0
- data/docs/screenshots/server.png +0 -0
- data/docs/screenshots/table_sizes.png +0 -0
- data/lib/generators/sql_genius/install/install_generator.rb +19 -0
- data/lib/generators/sql_genius/install/templates/initializer.rb +56 -0
- data/lib/sql_genius/configuration.rb +114 -0
- data/lib/sql_genius/core/ai/client.rb +155 -0
- data/lib/sql_genius/core/ai/config.rb +47 -0
- data/lib/sql_genius/core/ai/connection_advisor.rb +96 -0
- data/lib/sql_genius/core/ai/describe_query.rb +41 -0
- data/lib/sql_genius/core/ai/dialect_hints.rb +35 -0
- data/lib/sql_genius/core/ai/index_advisor.rb +43 -0
- data/lib/sql_genius/core/ai/index_planner.rb +91 -0
- data/lib/sql_genius/core/ai/innodb_interpreter.rb +78 -0
- data/lib/sql_genius/core/ai/migration_risk.rb +51 -0
- data/lib/sql_genius/core/ai/optimization.rb +81 -0
- data/lib/sql_genius/core/ai/pattern_grouper.rb +94 -0
- data/lib/sql_genius/core/ai/rewrite_query.rb +51 -0
- data/lib/sql_genius/core/ai/schema_context_builder.rb +82 -0
- data/lib/sql_genius/core/ai/schema_review.rb +46 -0
- data/lib/sql_genius/core/ai/suggestion.rb +74 -0
- data/lib/sql_genius/core/ai/variable_reviewer.rb +113 -0
- data/lib/sql_genius/core/ai/workload_digest.rb +86 -0
- data/lib/sql_genius/core/analysis/columns.rb +63 -0
- data/lib/sql_genius/core/analysis/duplicate_indexes.rb +85 -0
- data/lib/sql_genius/core/analysis/query_history.rb +50 -0
- data/lib/sql_genius/core/analysis/query_stats.rb +76 -0
- data/lib/sql_genius/core/analysis/server_overview.rb +294 -0
- data/lib/sql_genius/core/analysis/stats_collector.rb +118 -0
- data/lib/sql_genius/core/analysis/stats_history.rb +42 -0
- data/lib/sql_genius/core/analysis/table_sizes.rb +52 -0
- data/lib/sql_genius/core/analysis/unused_indexes.rb +62 -0
- data/lib/sql_genius/core/column_definition.rb +30 -0
- data/lib/sql_genius/core/connection/active_record_adapter.rb +75 -0
- data/lib/sql_genius/core/connection/fake_adapter.rb +114 -0
- data/lib/sql_genius/core/connection.rb +37 -0
- data/lib/sql_genius/core/execution_result.rb +27 -0
- data/lib/sql_genius/core/index_definition.rb +23 -0
- data/lib/sql_genius/core/query_builders/mysql.rb +169 -0
- data/lib/sql_genius/core/query_builders/postgresql.rb +185 -0
- data/lib/sql_genius/core/query_builders.rb +27 -0
- data/lib/sql_genius/core/query_explainer.rb +113 -0
- data/lib/sql_genius/core/query_runner/config.rb +21 -0
- data/lib/sql_genius/core/query_runner.rb +123 -0
- data/lib/sql_genius/core/result.rb +43 -0
- data/lib/sql_genius/core/server_info.rb +54 -0
- data/lib/sql_genius/core/sql_validator.rb +149 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_shared_results.html.erb +59 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_ai_tools.html.erb +43 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_dashboard.html.erb +97 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_duplicate_indexes.html.erb +35 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_query_explorer.html.erb +110 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_query_stats.html.erb +43 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_server.html.erb +59 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_slow_queries.html.erb +17 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_table_sizes.html.erb +33 -0
- data/lib/sql_genius/core/views/sql_genius/queries/_tab_unused_indexes.html.erb +54 -0
- data/lib/sql_genius/core/views/sql_genius/queries/dashboard.html.erb +1826 -0
- data/lib/sql_genius/core/views/sql_genius/queries/query_detail.html.erb +465 -0
- data/lib/sql_genius/core.rb +72 -0
- data/lib/sql_genius/engine.rb +31 -0
- data/lib/sql_genius/slow_query_monitor.rb +43 -0
- data/lib/sql_genius/version.rb +5 -0
- data/lib/sql_genius.rb +29 -0
- data/sql_genius.gemspec +47 -0
- metadata +171 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Builds and sends a "describe this query" prompt to Core::Ai::Client.
|
|
7
|
+
# Pure function of SQL + config.domain_context — no connection lookup.
|
|
8
|
+
#
|
|
9
|
+
# Extracted from app/controllers/concerns/sql_genius/ai_features.rb
|
|
10
|
+
# in Phase 2a.
|
|
11
|
+
class DescribeQuery
|
|
12
|
+
def initialize(client, config)
|
|
13
|
+
@client = client
|
|
14
|
+
@config = config
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def call(sql)
|
|
18
|
+
messages = [
|
|
19
|
+
{ role: "system", content: system_prompt },
|
|
20
|
+
{ role: "user", content: sql },
|
|
21
|
+
]
|
|
22
|
+
@client.chat(messages: messages)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def system_prompt
|
|
28
|
+
<<~PROMPT
|
|
29
|
+
You are a MySQL query explainer. Given a SQL query, explain in plain English:
|
|
30
|
+
1. What the query does (tables involved, joins, filters, aggregations)
|
|
31
|
+
2. How data flows through the query
|
|
32
|
+
3. Any subtle behaviors (implicit type casts, NULL handling in NOT IN, DISTINCT effects, etc.)
|
|
33
|
+
4. Potential performance concerns visible from the SQL structure alone
|
|
34
|
+
#{@config.domain_context}
|
|
35
|
+
Respond with JSON: {"explanation": "your plain-English explanation using markdown formatting"}
|
|
36
|
+
PROMPT
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Snippets injected into AI system prompts so the model generates SQL in
|
|
7
|
+
# the right dialect. Without these, prompts hardcode "MySQL" and tell
|
|
8
|
+
# the model to use backticks — which produces broken SQL on PostgreSQL
|
|
9
|
+
# (PG uses double quotes for identifiers and has no backtick syntax).
|
|
10
|
+
module DialectHints
|
|
11
|
+
extend self
|
|
12
|
+
|
|
13
|
+
# Display name suitable for "You are a SQL assistant for a #{name_for}
|
|
14
|
+
# database." prompts.
|
|
15
|
+
def name_for(connection)
|
|
16
|
+
case connection.server_version.dialect
|
|
17
|
+
when :postgresql then "PostgreSQL"
|
|
18
|
+
else "MySQL/MariaDB"
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Identifier-quoting rule string for inclusion in a numbered Rules
|
|
23
|
+
# list in a prompt. PG uses double quotes; MySQL/MariaDB uses
|
|
24
|
+
# backticks.
|
|
25
|
+
def identifier_quoting_rule(connection)
|
|
26
|
+
if connection.server_version.postgresql?
|
|
27
|
+
%(Use double quotes ("col_name") for case-sensitive identifiers; otherwise leave them bare.)
|
|
28
|
+
else
|
|
29
|
+
"Use backticks (`col_name`) for table and column names."
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
class IndexAdvisor
|
|
7
|
+
def initialize(client, config, connection)
|
|
8
|
+
@client = client
|
|
9
|
+
@config = config
|
|
10
|
+
@connection = connection
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(sql, explain_rows)
|
|
14
|
+
tables = SqlValidator.extract_table_references(sql, @connection)
|
|
15
|
+
schema = SchemaContextBuilder.new(@connection).call(tables, detail: :with_cardinality)
|
|
16
|
+
explain_text = explain_rows.map { |row| row.join(" | ") }.join("\n")
|
|
17
|
+
|
|
18
|
+
messages = [
|
|
19
|
+
{ role: "system", content: system_prompt },
|
|
20
|
+
{ role: "user", content: "Query:\n#{sql}\n\nEXPLAIN:\n#{explain_text}\n\nSchema:\n#{schema}" },
|
|
21
|
+
]
|
|
22
|
+
@client.chat(messages: messages)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def system_prompt
|
|
28
|
+
<<~PROMPT
|
|
29
|
+
You are a #{DialectHints.name_for(@connection)} index advisor. Given a query, its EXPLAIN output, and current index/cardinality information, suggest optimal indexes. Consider:
|
|
30
|
+
- Composite index column ordering (most selective first, or matching query order)
|
|
31
|
+
- Covering indexes to avoid table lookups
|
|
32
|
+
- Partial indexes for long string columns
|
|
33
|
+
- Write-side costs (if this is a high-write table, note the INSERT/UPDATE overhead)
|
|
34
|
+
- Whether existing indexes could be extended rather than creating new ones
|
|
35
|
+
#{@config.domain_context}
|
|
36
|
+
|
|
37
|
+
Respond with JSON: {"indexes": "markdown-formatted recommendations with exact CREATE INDEX statements, rationale for column ordering, and estimated impact. Include any indexes that should be DROPPED as part of the change."}
|
|
38
|
+
PROMPT
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
class IndexPlanner
|
|
7
|
+
def initialize(client, config, connection)
|
|
8
|
+
@client = client
|
|
9
|
+
@config = config
|
|
10
|
+
@connection = connection
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(tables = nil)
|
|
14
|
+
target_tables = resolve_tables(tables)
|
|
15
|
+
return { "plan" => "No tables found to analyze." } if target_tables.empty?
|
|
16
|
+
|
|
17
|
+
unused = Analysis::UnusedIndexes.new(@connection).call.indexes
|
|
18
|
+
duplicates = Analysis::DuplicateIndexes.new(@connection, blocked_tables: []).call
|
|
19
|
+
schema = SchemaContextBuilder.new(@connection).call(target_tables, detail: :with_cardinality)
|
|
20
|
+
|
|
21
|
+
index_map = target_tables.to_h do |table|
|
|
22
|
+
[table, @connection.indexes_for(table).map do |idx|
|
|
23
|
+
"#{"UNIQUE " if idx.unique}INDEX #{idx.name} (#{idx.columns.join(", ")})"
|
|
24
|
+
end,]
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
messages = [
|
|
28
|
+
{ role: "system", content: system_prompt },
|
|
29
|
+
{ role: "user", content: user_prompt(schema, unused, duplicates, index_map) },
|
|
30
|
+
]
|
|
31
|
+
@client.chat(messages: messages)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def resolve_tables(tables)
|
|
37
|
+
list = Array(tables).reject { |t| t.to_s.empty? }
|
|
38
|
+
return list unless list.empty?
|
|
39
|
+
|
|
40
|
+
top_tables_by_size
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def top_tables_by_size
|
|
44
|
+
db = @connection.current_database
|
|
45
|
+
result = @connection.exec_query(
|
|
46
|
+
"SELECT table_name FROM information_schema.tables " \
|
|
47
|
+
"WHERE table_schema = #{@connection.quote(db)} AND table_type = 'BASE TABLE' " \
|
|
48
|
+
"ORDER BY (data_length + index_length) DESC LIMIT 10",
|
|
49
|
+
)
|
|
50
|
+
result.rows.map(&:first)
|
|
51
|
+
rescue StandardError
|
|
52
|
+
@connection.tables.first(10)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def user_prompt(schema, unused, duplicates, index_map)
|
|
56
|
+
parts = ["Schema with cardinality:\n#{schema}"]
|
|
57
|
+
|
|
58
|
+
if unused.any?
|
|
59
|
+
unused_text = unused.map { |u| "#{u[:table]}.#{u[:index_name]} (reads=#{u[:reads]}, writes=#{u[:writes]})" }
|
|
60
|
+
parts << "Unused indexes (zero reads):\n#{unused_text.join("\n")}"
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
if duplicates.any?
|
|
64
|
+
dup_text = duplicates.map do |d|
|
|
65
|
+
"#{d[:table]}: #{d[:duplicate_index]} (#{d[:duplicate_columns].join(", ")}) covered by #{d[:covered_by_index]} (#{d[:covered_by_columns].join(", ")})"
|
|
66
|
+
end
|
|
67
|
+
parts << "Duplicate indexes:\n#{dup_text.join("\n")}"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
index_summary = index_map.map { |table, idxs| "#{table}: #{idxs.any? ? idxs.join("; ") : "NONE"}" }
|
|
71
|
+
parts << "Current indexes per table:\n#{index_summary.join("\n")}"
|
|
72
|
+
|
|
73
|
+
parts.join("\n\n")
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def system_prompt
|
|
77
|
+
<<~PROMPT
|
|
78
|
+
You are a #{DialectHints.name_for(@connection)} index consolidation planner. Given schema information, unused indexes, duplicate indexes, and current index listings, produce a consolidated optimization plan. For each recommendation:
|
|
79
|
+
- DROP redundant or unused indexes using #{DialectHints.name_for(@connection)} syntax (e.g. `ALTER TABLE t DROP INDEX i;` on MySQL/MariaDB, `DROP INDEX "i";` on PostgreSQL)
|
|
80
|
+
- MERGE overlapping indexes into composites where beneficial
|
|
81
|
+
- KEEP indexes that are actively used and well-structured
|
|
82
|
+
- ADD new composite indexes where query patterns suggest benefit
|
|
83
|
+
- Provide rationale for each change and estimated impact on read/write performance
|
|
84
|
+
#{@config.domain_context}
|
|
85
|
+
Respond with JSON: {"plan": "markdown with dialect-appropriate DROP INDEX / CREATE INDEX statements, rationale for each change, and estimated impact"}
|
|
86
|
+
PROMPT
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Interprets SHOW ENGINE INNODB STATUS output in plain English.
|
|
7
|
+
# Combines the raw InnoDB status text with key metrics from
|
|
8
|
+
# ServerOverview to give the LLM full context for its analysis.
|
|
9
|
+
class InnodbInterpreter
|
|
10
|
+
MAX_STATUS_LENGTH = 4000
|
|
11
|
+
|
|
12
|
+
def initialize(client, config, connection)
|
|
13
|
+
@client = client
|
|
14
|
+
@config = config
|
|
15
|
+
@connection = connection
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def call
|
|
19
|
+
if @connection.server_version.postgresql?
|
|
20
|
+
raise Core::UnsupportedDialect.for_postgresql("InnoDB Health Interpreter")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
status_text = fetch_innodb_status
|
|
24
|
+
metrics = fetch_innodb_metrics
|
|
25
|
+
|
|
26
|
+
messages = [
|
|
27
|
+
{ role: "system", content: system_prompt },
|
|
28
|
+
{ role: "user", content: user_prompt(status_text, metrics) },
|
|
29
|
+
]
|
|
30
|
+
@client.chat(messages: messages)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
private
|
|
34
|
+
|
|
35
|
+
def fetch_innodb_status
|
|
36
|
+
result = @connection.exec_query("SHOW ENGINE INNODB STATUS")
|
|
37
|
+
text = result.rows.first&.dig(2).to_s
|
|
38
|
+
text.length > MAX_STATUS_LENGTH ? text[0, MAX_STATUS_LENGTH] : text
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def fetch_innodb_metrics
|
|
42
|
+
overview = Analysis::ServerOverview.new(@connection).call
|
|
43
|
+
overview[:innodb]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def system_prompt
|
|
47
|
+
<<~PROMPT
|
|
48
|
+
You are a MySQL InnoDB internals expert. Analyze the SHOW ENGINE INNODB STATUS output and the supplementary metrics below. Provide a plain-English interpretation organized by these sections:
|
|
49
|
+
- **Deadlocks**: recent deadlock information, lock wait chains, affected transactions
|
|
50
|
+
- **Transaction History**: history list length, purge lag, long-running transactions
|
|
51
|
+
- **Buffer Pool**: hit rate, dirty page ratio, free pages, eviction pressure
|
|
52
|
+
- **I/O**: pending reads/writes, log sequence numbers, checkpoint age, log sizing adequacy
|
|
53
|
+
- **Semaphores**: mutex/rw-lock waits, spin rounds, OS waits indicating contention
|
|
54
|
+
|
|
55
|
+
For each section, explain what the numbers mean in practical terms and recommend specific actions if problems are detected.
|
|
56
|
+
#{@config.domain_context}
|
|
57
|
+
Respond with JSON: {"findings": "markdown analysis organized by: Deadlocks, Transaction History, Buffer Pool, I/O, Semaphores. Each section should include current state assessment, risk level, and actionable recommendations."}
|
|
58
|
+
PROMPT
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def user_prompt(status_text, metrics)
|
|
62
|
+
lines = ["== SHOW ENGINE INNODB STATUS =="]
|
|
63
|
+
lines << status_text
|
|
64
|
+
lines << ""
|
|
65
|
+
lines << "== InnoDB Metrics Summary =="
|
|
66
|
+
lines << "Buffer Pool Size: #{metrics[:buffer_pool_mb]} MB"
|
|
67
|
+
lines << "Buffer Pool Hit Rate: #{metrics[:buffer_pool_hit_rate]}%"
|
|
68
|
+
lines << "Dirty Pages: #{metrics[:buffer_pool_pages_dirty]}"
|
|
69
|
+
lines << "Free Pages: #{metrics[:buffer_pool_pages_free]}"
|
|
70
|
+
lines << "Total Pages: #{metrics[:buffer_pool_pages_total]}"
|
|
71
|
+
lines << "Row Lock Waits: #{metrics[:row_lock_waits]}"
|
|
72
|
+
lines << "Row Lock Time (ms): #{metrics[:row_lock_time_ms]}"
|
|
73
|
+
lines.join("\n")
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
class MigrationRisk
|
|
7
|
+
def initialize(client, config, connection)
|
|
8
|
+
@client = client
|
|
9
|
+
@config = config
|
|
10
|
+
@connection = connection
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def call(migration_sql)
|
|
14
|
+
tables = extract_table_names(migration_sql)
|
|
15
|
+
schema = SchemaContextBuilder.new(@connection).call(tables, detail: :basic)
|
|
16
|
+
schema_text = schema.to_s.empty? ? "Could not determine" : schema
|
|
17
|
+
|
|
18
|
+
messages = [
|
|
19
|
+
{ role: "system", content: system_prompt },
|
|
20
|
+
{ role: "user", content: "Migration:\n#{migration_sql}\n\nAffected Tables:\n#{schema_text}" },
|
|
21
|
+
]
|
|
22
|
+
@client.chat(messages: messages)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
private
|
|
26
|
+
|
|
27
|
+
def extract_table_names(migration_sql)
|
|
28
|
+
# Match Rails migration helpers and raw SQL ALTER TABLE statements.
|
|
29
|
+
rails_matches = migration_sql.scan(/(?:create_table|add_column|remove_column|add_index|remove_index|rename_column|change_column|alter\s+table)\s+[:"]?(\w+)/i).flatten
|
|
30
|
+
sql_matches = migration_sql.scan(/ALTER\s+TABLE\s+`?(\w+)`?/i).flatten
|
|
31
|
+
(rails_matches + sql_matches).uniq
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def system_prompt
|
|
35
|
+
<<~PROMPT
|
|
36
|
+
You are a #{DialectHints.name_for(@connection)} migration risk assessor. Given a Rails migration or DDL, evaluate:
|
|
37
|
+
1. Will this lock the table? For how long given the row count?
|
|
38
|
+
2. Is this safe to run during traffic, or does it need a maintenance window?
|
|
39
|
+
3. Should pt-online-schema-change or gh-ost be used instead?
|
|
40
|
+
4. Will it break or degrade any of the active queries against this table?
|
|
41
|
+
5. Are there any data loss risks?
|
|
42
|
+
6. What is the recommended deployment strategy?
|
|
43
|
+
#{@config.domain_context}
|
|
44
|
+
|
|
45
|
+
Respond with JSON: {"risk_level": "low|medium|high|critical", "assessment": "markdown-formatted risk assessment with specific recommendations and estimated lock duration"}
|
|
46
|
+
PROMPT
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Analyses a SQL query + its EXPLAIN output and asks the AI client
|
|
7
|
+
# for optimization suggestions.
|
|
8
|
+
#
|
|
9
|
+
# Construct with:
|
|
10
|
+
# connection - a Core::Connection implementation
|
|
11
|
+
# client - a Core::Ai::Client
|
|
12
|
+
# config - the Core::Ai::Config
|
|
13
|
+
#
|
|
14
|
+
# Call:
|
|
15
|
+
# .call(sql, explain_rows, allowed_tables)
|
|
16
|
+
# explain_rows - Array of arrays OR a pre-formatted String
|
|
17
|
+
# -> Hash with "suggestions" key
|
|
18
|
+
class Optimization
|
|
19
|
+
def initialize(connection, client, config)
|
|
20
|
+
@connection = connection
|
|
21
|
+
@client = client
|
|
22
|
+
@config = config
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def call(sql, explain_rows, allowed_tables)
|
|
26
|
+
schema = build_schema_description(allowed_tables)
|
|
27
|
+
messages = [
|
|
28
|
+
{ role: "system", content: system_prompt(schema) },
|
|
29
|
+
{ role: "user", content: user_prompt(sql, explain_rows) },
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
@client.chat(messages: messages)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def system_prompt(schema_description)
|
|
38
|
+
<<~PROMPT
|
|
39
|
+
You are a #{DialectHints.name_for(@connection)} query optimization expert. Given a SQL query and its EXPLAIN output, analyze the query execution plan and provide actionable optimization suggestions.
|
|
40
|
+
|
|
41
|
+
Available schema:
|
|
42
|
+
#{schema_description}
|
|
43
|
+
|
|
44
|
+
Respond with JSON:
|
|
45
|
+
{
|
|
46
|
+
"suggestions": "Markdown-formatted analysis and suggestions. Include: 1) Summary of current execution plan (scan types, rows examined). 2) Specific recommendations such as indexes to add (provide exact CREATE INDEX statements), query rewrites, or structural changes. 3) Expected impact of each suggestion."
|
|
47
|
+
}
|
|
48
|
+
PROMPT
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def user_prompt(sql, explain_rows)
|
|
52
|
+
<<~PROMPT
|
|
53
|
+
SQL Query:
|
|
54
|
+
#{sql}
|
|
55
|
+
|
|
56
|
+
EXPLAIN Output:
|
|
57
|
+
#{format_explain(explain_rows)}
|
|
58
|
+
PROMPT
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def format_explain(explain_rows)
|
|
62
|
+
return explain_rows if explain_rows.is_a?(String)
|
|
63
|
+
|
|
64
|
+
explain_rows.map { |row| row.join(" | ") }.join("\n")
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def build_schema_description(allowed_tables)
|
|
68
|
+
allowed_tables.map do |table|
|
|
69
|
+
next unless @connection.tables.include?(table)
|
|
70
|
+
|
|
71
|
+
columns = @connection.columns_for(table).map { |c| "#{c.name} (#{c.type})" }
|
|
72
|
+
indexes = @connection.indexes_for(table).map { |idx| "#{idx.name}: [#{idx.columns.join(", ")}]#{" UNIQUE" if idx.unique}" }
|
|
73
|
+
desc = "#{table}: #{columns.join(", ")}"
|
|
74
|
+
desc += "\n Indexes: #{indexes.join("; ")}" if indexes.any?
|
|
75
|
+
desc
|
|
76
|
+
end.compact.join("\n")
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Groups slow queries by shared root cause so a single fix can improve
|
|
7
|
+
# multiple queries at once. Pulls high-cost statements from
|
|
8
|
+
# performance_schema, extracts referenced tables, builds schema context,
|
|
9
|
+
# and asks the LLM to cluster queries by underlying issue.
|
|
10
|
+
#
|
|
11
|
+
# Construct with:
|
|
12
|
+
# connection - a Core::Connection implementation
|
|
13
|
+
# client - a Core::Ai::Client
|
|
14
|
+
# config - the Core::Ai::Config
|
|
15
|
+
#
|
|
16
|
+
# Call:
|
|
17
|
+
# .call() -> Hash with "groups" key containing markdown analysis
|
|
18
|
+
class PatternGrouper
|
|
19
|
+
QUERY_LIMIT = 30
|
|
20
|
+
ROWS_RATIO_THRESHOLD = 10
|
|
21
|
+
AVG_TIME_THRESHOLD = 50
|
|
22
|
+
|
|
23
|
+
def initialize(connection, client, config)
|
|
24
|
+
@connection = connection
|
|
25
|
+
@client = client
|
|
26
|
+
@config = config
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def call
|
|
30
|
+
all_stats = Analysis::QueryStats.new(@connection).call(sort: "total_time", limit: QUERY_LIMIT)
|
|
31
|
+
high_cost = all_stats.select { |s| s[:rows_ratio] > ROWS_RATIO_THRESHOLD || s[:avg_time_ms] > AVG_TIME_THRESHOLD }
|
|
32
|
+
return { "groups" => "No high-cost queries found to analyze." } if high_cost.empty?
|
|
33
|
+
|
|
34
|
+
tables = extract_tables(high_cost)
|
|
35
|
+
schema = tables.any? ? SchemaContextBuilder.new(@connection).call(tables, detail: :basic) : ""
|
|
36
|
+
|
|
37
|
+
messages = [
|
|
38
|
+
{ role: "system", content: system_prompt },
|
|
39
|
+
{ role: "user", content: user_prompt(high_cost, schema) },
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
@client.chat(messages: messages)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def extract_tables(stats)
|
|
48
|
+
stats.flat_map { |s| SqlValidator.extract_table_references(s[:sql], @connection) }.uniq
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def system_prompt
|
|
52
|
+
prompt = <<~PROMPT
|
|
53
|
+
You are a #{DialectHints.name_for(@connection)} performance analyst specializing in root-cause analysis.
|
|
54
|
+
PROMPT
|
|
55
|
+
|
|
56
|
+
if @config.domain_context && !@config.domain_context.empty?
|
|
57
|
+
prompt += <<~PROMPT
|
|
58
|
+
|
|
59
|
+
Domain context:
|
|
60
|
+
#{@config.domain_context}
|
|
61
|
+
PROMPT
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
prompt += <<~PROMPT
|
|
65
|
+
|
|
66
|
+
Given a set of high-cost queries and the schema they reference, group them by shared root cause.
|
|
67
|
+
For each group provide:
|
|
68
|
+
1. The shared root cause (e.g., missing index, full table scan, implicit type conversion)
|
|
69
|
+
2. The affected queries (numbered)
|
|
70
|
+
3. A single fix that addresses all queries in the group (with exact SQL: CREATE INDEX, ALTER TABLE, etc.)
|
|
71
|
+
4. Estimated performance impact
|
|
72
|
+
|
|
73
|
+
Respond with JSON: {"groups": "markdown with each group showing: the shared root cause, affected queries (numbered), the single fix that addresses all of them (with exact SQL), and estimated impact"}
|
|
74
|
+
PROMPT
|
|
75
|
+
|
|
76
|
+
prompt
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def user_prompt(stats, schema)
|
|
80
|
+
formatted = stats.map.with_index(1) do |s, i|
|
|
81
|
+
"#{i}. SQL: #{s[:sql]}\n " \
|
|
82
|
+
"calls=#{s[:calls]}, avg_time_ms=#{s[:avg_time_ms]}, " \
|
|
83
|
+
"rows_ratio=#{s[:rows_ratio]}, rows_examined=#{s[:rows_examined]}, " \
|
|
84
|
+
"tmp_disk_tables=#{s[:tmp_disk_tables]}"
|
|
85
|
+
end.join("\n")
|
|
86
|
+
|
|
87
|
+
parts = ["High-cost queries (rows_ratio > #{ROWS_RATIO_THRESHOLD} OR avg_time_ms > #{AVG_TIME_THRESHOLD}):\n\n#{formatted}"]
|
|
88
|
+
parts << "Schema context:\n#{schema}" unless schema.empty?
|
|
89
|
+
parts.join("\n\n")
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Suggests a rewritten version of a SQL query based on the schema
|
|
7
|
+
# context of the tables it references.
|
|
8
|
+
class RewriteQuery
|
|
9
|
+
def initialize(client, config, connection)
|
|
10
|
+
@client = client
|
|
11
|
+
@config = config
|
|
12
|
+
@connection = connection
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def call(sql)
|
|
16
|
+
tables = SqlValidator.extract_table_references(sql, @connection)
|
|
17
|
+
schema = SchemaContextBuilder.new(@connection).call(tables, detail: :basic)
|
|
18
|
+
|
|
19
|
+
messages = [
|
|
20
|
+
{ role: "system", content: system_prompt(schema) },
|
|
21
|
+
{ role: "user", content: sql },
|
|
22
|
+
]
|
|
23
|
+
@client.chat(messages: messages)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def system_prompt(schema)
|
|
29
|
+
<<~PROMPT
|
|
30
|
+
You are a #{DialectHints.name_for(@connection)} query rewrite expert. Analyze the SQL for anti-patterns and suggest a rewritten version. Look for:
|
|
31
|
+
- SELECT * when specific columns would suffice
|
|
32
|
+
- Correlated subqueries that could be JOINs
|
|
33
|
+
- OR conditions preventing index use (suggest UNION ALL)
|
|
34
|
+
- LIKE '%prefix' patterns (leading wildcard)
|
|
35
|
+
- Implicit type conversions in WHERE clauses
|
|
36
|
+
- NOT IN with NULLable columns (suggest NOT EXISTS)
|
|
37
|
+
- ORDER BY on non-indexed columns with LIMIT
|
|
38
|
+
- Unnecessary DISTINCT
|
|
39
|
+
- Functions on indexed columns in WHERE (e.g., DATE(created_at) instead of range)
|
|
40
|
+
|
|
41
|
+
Available schema:
|
|
42
|
+
#{schema}
|
|
43
|
+
#{@config.domain_context}
|
|
44
|
+
|
|
45
|
+
Respond with JSON: {"original": "the original SQL", "rewritten": "the improved SQL", "changes": "markdown list of each change and why it helps"}
|
|
46
|
+
PROMPT
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlGenius
|
|
4
|
+
module Core
|
|
5
|
+
module Ai
|
|
6
|
+
# Builds formatted schema-description strings for AI prompt context.
|
|
7
|
+
# Used by SchemaReview, RewriteQuery, IndexAdvisor, and MigrationRisk.
|
|
8
|
+
#
|
|
9
|
+
# Consolidates the ~10 lines of schema description logic that were
|
|
10
|
+
# duplicated across 4 AI features in the Rails adapter's
|
|
11
|
+
# app/controllers/concerns/sql_genius/ai_features.rb.
|
|
12
|
+
class SchemaContextBuilder
|
|
13
|
+
def initialize(connection)
|
|
14
|
+
@connection = connection
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Returns a formatted multi-line string describing the given tables.
|
|
18
|
+
#
|
|
19
|
+
# detail:
|
|
20
|
+
# :basic — name, row count, primary key, columns, indexes
|
|
21
|
+
# :with_cardinality — adds information_schema.STATISTICS cardinality per index
|
|
22
|
+
def call(tables, detail: :basic)
|
|
23
|
+
Array(tables).filter_map { |t| describe_table(t, detail: detail) }.join("\n\n")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def describe_table(table, detail:)
|
|
29
|
+
return unless @connection.tables.include?(table)
|
|
30
|
+
|
|
31
|
+
cols = @connection.columns_for(table).map do |c|
|
|
32
|
+
parts = ["#{c.name} #{c.sql_type}"]
|
|
33
|
+
parts << "NOT NULL" unless c.null
|
|
34
|
+
parts << "DEFAULT #{c.default}" if c.default
|
|
35
|
+
parts.join(" ")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
pk = @connection.primary_key(table)
|
|
39
|
+
indexes = @connection.indexes_for(table).map do |idx|
|
|
40
|
+
"#{"UNIQUE " if idx.unique}INDEX #{idx.name} (#{idx.columns.join(", ")})"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
row_count = fetch_row_count(table)
|
|
44
|
+
|
|
45
|
+
parts = [
|
|
46
|
+
"Table: #{table} (~#{row_count || "unknown"} rows)",
|
|
47
|
+
"Primary Key: #{pk || "NONE"}",
|
|
48
|
+
"Columns: #{cols.join(", ")}",
|
|
49
|
+
"Indexes: #{indexes.any? ? indexes.join(", ") : "NONE"}",
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
parts << index_cardinality(table) if detail == :with_cardinality
|
|
53
|
+
|
|
54
|
+
parts.join("\n")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def fetch_row_count(table)
|
|
58
|
+
sql = "SELECT TABLE_ROWS FROM information_schema.tables " \
|
|
59
|
+
"WHERE table_schema = #{@connection.quote(@connection.current_database)} " \
|
|
60
|
+
"AND table_name = #{@connection.quote(table)}"
|
|
61
|
+
result = @connection.exec_query(sql)
|
|
62
|
+
result.rows.first&.first
|
|
63
|
+
rescue StandardError
|
|
64
|
+
nil
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def index_cardinality(table)
|
|
68
|
+
sql = "SELECT INDEX_NAME, COLUMN_NAME, CARDINALITY, SEQ_IN_INDEX " \
|
|
69
|
+
"FROM information_schema.STATISTICS " \
|
|
70
|
+
"WHERE TABLE_SCHEMA = #{@connection.quote(@connection.current_database)} " \
|
|
71
|
+
"AND TABLE_NAME = #{@connection.quote(table)} " \
|
|
72
|
+
"ORDER BY INDEX_NAME, SEQ_IN_INDEX"
|
|
73
|
+
result = @connection.exec_query(sql)
|
|
74
|
+
stats = result.rows.map { |r| "#{r[0]}.#{r[1]}: cardinality=#{r[2]}" }
|
|
75
|
+
"Cardinality: #{stats.join(", ")}"
|
|
76
|
+
rescue StandardError
|
|
77
|
+
"Cardinality: (unavailable)"
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|