sql-chatbot-rails 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +20 -0
- data/app/controllers/sql_chatbot/chatbot_controller.rb +158 -0
- data/config/routes.rb +11 -0
- data/lib/generators/sql_chatbot/install_generator.rb +25 -0
- data/lib/generators/sql_chatbot/templates/initializer.rb +22 -0
- data/lib/sql_chatbot/auth/cors.rb +35 -0
- data/lib/sql_chatbot/auth/jwt.rb +34 -0
- data/lib/sql_chatbot/configuration.rb +58 -0
- data/lib/sql_chatbot/engine.rb +23 -0
- data/lib/sql_chatbot/grammar/count_renderer.rb +113 -0
- data/lib/sql_chatbot/grammar/entity_candidates.rb +210 -0
- data/lib/sql_chatbot/grammar/intent_extractor.rb +191 -0
- data/lib/sql_chatbot/grammar/list_renderer.rb +50 -0
- data/lib/sql_chatbot/grammar/miss_logger.rb +17 -0
- data/lib/sql_chatbot/grammar/modifiers.rb +145 -0
- data/lib/sql_chatbot/grammar/primitives.rb +69 -0
- data/lib/sql_chatbot/grammar/programmatic_renderer.rb +258 -0
- data/lib/sql_chatbot/grammar/registry.rb +66 -0
- data/lib/sql_chatbot/grammar/sanity_check.rb +37 -0
- data/lib/sql_chatbot/grammar/template_compiler.rb +179 -0
- data/lib/sql_chatbot/llm/client.rb +87 -0
- data/lib/sql_chatbot/prompts/answer.rb +157 -0
- data/lib/sql_chatbot/prompts/classify.rb +59 -0
- data/lib/sql_chatbot/prompts/generate_sql.rb +88 -0
- data/lib/sql_chatbot/services/code_indexer.rb +337 -0
- data/lib/sql_chatbot/services/grammar_pipeline.rb +45 -0
- data/lib/sql_chatbot/services/model_introspector.rb +152 -0
- data/lib/sql_chatbot/services/orchestrator.rb +635 -0
- data/lib/sql_chatbot/services/registry_builder.rb +385 -0
- data/lib/sql_chatbot/services/route_introspector.rb +118 -0
- data/lib/sql_chatbot/services/schema_service.rb +884 -0
- data/lib/sql_chatbot/services/sql_executor.rb +81 -0
- data/lib/sql_chatbot/version.rb +5 -0
- data/lib/sql_chatbot_rails.rb +91 -0
- data/vendor/assets/widget.js +53 -0
- metadata +180 -0
|
@@ -0,0 +1,635 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "sql_chatbot/prompts/classify"
|
|
5
|
+
require "sql_chatbot/prompts/generate_sql"
|
|
6
|
+
require "sql_chatbot/prompts/answer"
|
|
7
|
+
require "sql_chatbot/services/sql_executor"
|
|
8
|
+
require "sql_chatbot/services/grammar_pipeline"
|
|
9
|
+
require "sql_chatbot/grammar/miss_logger"
|
|
10
|
+
require "sql_chatbot/grammar/sanity_check"
|
|
11
|
+
require "sql_chatbot/grammar/list_renderer"
|
|
12
|
+
require "sql_chatbot/grammar/count_renderer"
|
|
13
|
+
require "sql_chatbot/grammar/programmatic_renderer"
|
|
14
|
+
|
|
15
|
+
module SqlChatbot
|
|
16
|
+
module Services
|
|
17
|
+
class Orchestrator
|
|
18
|
+
VALID_TYPES = %w[data data_with_code code navigation guidance greeting unsafe].freeze
|
|
19
|
+
|
|
20
|
+
def initialize(llm_client:, schema_service:, code_indexer:, route_introspector_data: nil)
|
|
21
|
+
@llm = llm_client
|
|
22
|
+
@schema = schema_service
|
|
23
|
+
@code_indexer = code_indexer
|
|
24
|
+
@route_introspector_data = route_introspector_data
|
|
25
|
+
@manifest = nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def set_manifest(manifest)
|
|
29
|
+
version = manifest["version"] || manifest[:version]
|
|
30
|
+
unless version == 1
|
|
31
|
+
warn "[SqlChatbot] Unsupported manifest version: #{version}"
|
|
32
|
+
return
|
|
33
|
+
end
|
|
34
|
+
@manifest = manifest
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def route_list
|
|
38
|
+
build_route_list
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns an Enumerator that yields SSE event hashes.
|
|
42
|
+
# Events: classifying, classified, sql, executing, token, done, error
|
|
43
|
+
def handle_question(question:, page_context: nil, history: [])
|
|
44
|
+
Enumerator.new do |yielder|
|
|
45
|
+
begin
|
|
46
|
+
# --- Step 1: Classify ---
|
|
47
|
+
yielder.yield({ type: "classifying" })
|
|
48
|
+
|
|
49
|
+
table_names_str = @schema.table_names
|
|
50
|
+
classify_messages = Prompts::Classify.build_messages(
|
|
51
|
+
question: question,
|
|
52
|
+
schema_summary: table_names_str,
|
|
53
|
+
page_context: page_context,
|
|
54
|
+
history: history
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
raw = @llm.call(classify_messages, json_mode: true)
|
|
58
|
+
classification = parse_classification(raw)
|
|
59
|
+
|
|
60
|
+
yielder.yield({
|
|
61
|
+
type: "classified",
|
|
62
|
+
questionType: classification[:type],
|
|
63
|
+
confidence: classification[:confidence]
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
# --- Step 2: Route by question type ---
|
|
67
|
+
case classification[:type]
|
|
68
|
+
when "data", "data_with_code"
|
|
69
|
+
handle_data_with_code(yielder, question, classification, page_context, history)
|
|
70
|
+
when "code"
|
|
71
|
+
handle_code(yielder, question, classification, history)
|
|
72
|
+
when "navigation", "guidance"
|
|
73
|
+
handle_navigation(yielder, question, classification[:type], page_context, history)
|
|
74
|
+
when "greeting"
|
|
75
|
+
handle_greeting(yielder, question, history)
|
|
76
|
+
when "unsafe"
|
|
77
|
+
handle_unsafe(yielder)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
yielder.yield({ type: "done" })
|
|
81
|
+
rescue => e
|
|
82
|
+
log_error(e)
|
|
83
|
+
yielder.yield({ type: "error", message: friendly_error_message(e) })
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
# ============================================================
|
|
91
|
+
# Route handlers
|
|
92
|
+
# ============================================================
|
|
93
|
+
|
|
94
|
+
def handle_data_with_code(yielder, question, classification, page_context, history)
|
|
95
|
+
# --- Grammar-first path (before LLM SQL generation) ---
|
|
96
|
+
grammar_result = try_grammar_path(yielder, question, history)
|
|
97
|
+
if grammar_result == :handled
|
|
98
|
+
return
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Search code index for context
|
|
102
|
+
search_terms = classification[:searchTerms] || []
|
|
103
|
+
code_results = search_terms.empty? ? [] : @code_indexer.search(search_terms)
|
|
104
|
+
code_context = format_code_context(code_results)
|
|
105
|
+
code_snippets = to_code_snippets(code_results)
|
|
106
|
+
|
|
107
|
+
question_type = code_context.empty? ? "data" : "data_with_code"
|
|
108
|
+
|
|
109
|
+
# Find lookup hints matching the question
|
|
110
|
+
lookup_hints = @schema.find_lookup_hints(question)
|
|
111
|
+
|
|
112
|
+
# Select only relevant schema tables based on search terms
|
|
113
|
+
selected_schema = @schema.select_schema(search_terms)
|
|
114
|
+
|
|
115
|
+
# Generate SQL (with one retry on execution error)
|
|
116
|
+
gen_messages = Prompts::GenerateSql.build_messages(
|
|
117
|
+
question: question,
|
|
118
|
+
schema: selected_schema,
|
|
119
|
+
code_context: code_context.empty? ? nil : code_context,
|
|
120
|
+
lookup_hints: lookup_hints.empty? ? nil : lookup_hints,
|
|
121
|
+
history: history
|
|
122
|
+
)
|
|
123
|
+
raw_sql = @llm.call(gen_messages, json_mode: true)
|
|
124
|
+
parsed = parse_sql_generation(raw_sql)
|
|
125
|
+
|
|
126
|
+
if parsed[:sql].empty?
|
|
127
|
+
yielder.yield({ type: "error", message: "Failed to generate SQL" })
|
|
128
|
+
return
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
yielder.yield({ type: "sql", query: parsed[:sql], explanation: parsed[:explanation] })
|
|
132
|
+
|
|
133
|
+
# Validate SQL
|
|
134
|
+
validation = SqlExecutor.validate_sql(parsed[:sql])
|
|
135
|
+
unless validation[:valid]
|
|
136
|
+
yielder.yield({ type: "error", message: "SQL validation failed: #{validation[:reason]}" })
|
|
137
|
+
return
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
# Execute SQL with one retry on recoverable errors
|
|
141
|
+
yielder.yield({ type: "executing" })
|
|
142
|
+
|
|
143
|
+
begin
|
|
144
|
+
result = SqlExecutor.execute_sql(validation[:sql])
|
|
145
|
+
rescue ActiveRecord::StatementInvalid => e
|
|
146
|
+
log_error(e)
|
|
147
|
+
# Strategy 1: programmatic column fix (only for column errors)
|
|
148
|
+
if column_error?(e.message)
|
|
149
|
+
fixed_sql = try_fix_column(e.message, validation[:sql], selected_schema)
|
|
150
|
+
if fixed_sql
|
|
151
|
+
begin
|
|
152
|
+
fixed_validation = SqlExecutor.validate_sql(fixed_sql)
|
|
153
|
+
if fixed_validation[:valid]
|
|
154
|
+
yielder.yield({ type: "sql", query: fixed_sql, explanation: "Auto-corrected column name" })
|
|
155
|
+
result = SqlExecutor.execute_sql(fixed_validation[:sql])
|
|
156
|
+
end
|
|
157
|
+
rescue => _fix_error
|
|
158
|
+
# fall through to LLM retry
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Strategy 2: LLM retry — V1.2 #8 extends to ALL PG errors, not just column ones.
|
|
164
|
+
unless defined?(result) && result
|
|
165
|
+
error_hint = column_error?(e.message) ? build_column_hint(e.message, selected_schema) : ""
|
|
166
|
+
retry_messages = gen_messages + [
|
|
167
|
+
{ role: "assistant", content: raw_sql },
|
|
168
|
+
{ role: "user", content: "The SQL query failed with this error:\n#{e.message}\n\n#{error_hint}Please fix the SQL." }
|
|
169
|
+
]
|
|
170
|
+
begin
|
|
171
|
+
retry_sql = @llm.call(retry_messages, json_mode: true)
|
|
172
|
+
retry_parsed = parse_sql_generation(retry_sql)
|
|
173
|
+
if !retry_parsed[:sql].empty?
|
|
174
|
+
retry_validation = SqlExecutor.validate_sql(retry_parsed[:sql])
|
|
175
|
+
if retry_validation[:valid]
|
|
176
|
+
yielder.yield({ type: "sql", query: retry_parsed[:sql], explanation: "Corrected: #{retry_parsed[:explanation]}" })
|
|
177
|
+
begin
|
|
178
|
+
result = SqlExecutor.execute_sql(retry_validation[:sql])
|
|
179
|
+
rescue => _retry_exec_err
|
|
180
|
+
# retry also errored — fall through to graceful message
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
rescue => _llm_retry_err
|
|
185
|
+
# LLM call failed — fall through
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Strategy 3: Graceful message (V1.2 #8). Never render raw PG to user.
|
|
190
|
+
unless defined?(result) && result
|
|
191
|
+
yielder.yield({ type: "token", content: "I couldn't answer that one — could you rephrase or be more specific?" })
|
|
192
|
+
return
|
|
193
|
+
end
|
|
194
|
+
rescue => e
|
|
195
|
+
log_error(e)
|
|
196
|
+
yielder.yield({ type: "token", content: "I couldn't answer that one — could you rephrase or be more specific?" })
|
|
197
|
+
return
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Bug D guard: bypass the answer LLM for empty results, which it
|
|
201
|
+
# mis-renders as "No matching records found." even for valid 0-counts
|
|
202
|
+
# and NULL aggregates. Hard sweep on 2026-04-28 surfaced 5 of these.
|
|
203
|
+
# The guard returns nil when there is data — LLM does its normal job.
|
|
204
|
+
guarded = SqlChatbot::Grammar::ProgrammaticRenderer.render_empty_for_llm_sql(
|
|
205
|
+
question, validation[:sql], result[:rows]
|
|
206
|
+
)
|
|
207
|
+
if guarded
|
|
208
|
+
yielder.yield({ type: "token", content: guarded })
|
|
209
|
+
return
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Extract enum context from the selected schema for answer translation
|
|
213
|
+
enum_context = @schema.extract_enum_context(selected_schema)
|
|
214
|
+
|
|
215
|
+
# Stream answer
|
|
216
|
+
answer_messages = Prompts::Answer.build_messages(
|
|
217
|
+
question: question,
|
|
218
|
+
type: question_type,
|
|
219
|
+
sql_result: result[:rows],
|
|
220
|
+
sql_query: validation[:sql],
|
|
221
|
+
code_snippets: code_snippets.empty? ? nil : code_snippets,
|
|
222
|
+
page_context: page_context,
|
|
223
|
+
history: history,
|
|
224
|
+
enum_context: enum_context.empty? ? nil : enum_context
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
@llm.stream(answer_messages) do |chunk|
|
|
228
|
+
yielder.yield({ type: "token", content: chunk })
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def handle_code(yielder, question, classification, history)
|
|
233
|
+
search_terms = classification[:searchTerms] || []
|
|
234
|
+
code_results = search_terms.empty? ? [] : @code_indexer.search(search_terms)
|
|
235
|
+
|
|
236
|
+
if code_results.empty?
|
|
237
|
+
yielder.yield({ type: "token", content: "I couldn't find relevant code for that question." })
|
|
238
|
+
return
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
code_snippets = to_code_snippets(code_results)
|
|
242
|
+
|
|
243
|
+
answer_messages = Prompts::Answer.build_messages(
|
|
244
|
+
question: question,
|
|
245
|
+
type: "code",
|
|
246
|
+
code_snippets: code_snippets,
|
|
247
|
+
history: history
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
@llm.stream(answer_messages) do |chunk|
|
|
251
|
+
yielder.yield({ type: "token", content: chunk })
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def handle_navigation(yielder, question, type, page_context, history)
|
|
256
|
+
merged_routes = build_route_list
|
|
257
|
+
|
|
258
|
+
answer_messages = Prompts::Answer.build_messages(
|
|
259
|
+
question: question,
|
|
260
|
+
type: type,
|
|
261
|
+
page_context: page_context,
|
|
262
|
+
route_list: merged_routes,
|
|
263
|
+
history: history
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
@llm.stream(answer_messages) do |chunk|
|
|
267
|
+
yielder.yield({ type: "token", content: chunk })
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
def handle_greeting(yielder, question, history)
|
|
272
|
+
answer_messages = Prompts::Answer.build_messages(
|
|
273
|
+
question: question,
|
|
274
|
+
type: "greeting",
|
|
275
|
+
history: history
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
@llm.stream(answer_messages) do |chunk|
|
|
279
|
+
yielder.yield({ type: "token", content: chunk })
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def handle_unsafe(yielder)
|
|
284
|
+
yielder.yield({ type: "token", content: "I can't help with that request." })
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
# ============================================================
|
|
288
|
+
# Parsing helpers
|
|
289
|
+
# ============================================================
|
|
290
|
+
|
|
291
|
+
def parse_classification(raw)
|
|
292
|
+
parsed = JSON.parse(raw, symbolize_names: true)
|
|
293
|
+
type = parsed[:type]
|
|
294
|
+
type = "data" unless VALID_TYPES.include?(type)
|
|
295
|
+
{
|
|
296
|
+
type: type,
|
|
297
|
+
confidence: parsed[:confidence].is_a?(Numeric) ? parsed[:confidence] : 0.5,
|
|
298
|
+
searchTerms: parsed[:searchTerms].is_a?(Array) ? parsed[:searchTerms] : []
|
|
299
|
+
}
|
|
300
|
+
rescue JSON::ParserError
|
|
301
|
+
{ type: "data", confidence: 0.5, searchTerms: [] }
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
def parse_sql_generation(raw)
|
|
305
|
+
parsed = JSON.parse(raw, symbolize_names: true)
|
|
306
|
+
sql = parsed[:sql]
|
|
307
|
+
sql = "" unless sql.is_a?(String) && !sql.empty?
|
|
308
|
+
{ sql: sql, explanation: (parsed[:explanation] || "").to_s }
|
|
309
|
+
rescue JSON::ParserError
|
|
310
|
+
{ sql: "", explanation: "" }
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
# ============================================================
|
|
314
|
+
# Formatting helpers
|
|
315
|
+
# ============================================================
|
|
316
|
+
|
|
317
|
+
def format_code_context(results)
|
|
318
|
+
return "" if results.empty?
|
|
319
|
+
|
|
320
|
+
results.map { |r| "File: #{r[:file]}\n#{r[:content]}" }.join("\n\n")
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
def to_code_snippets(results)
|
|
324
|
+
results.map { |r| { file_path: r[:file], content: r[:content] } }
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
def friendly_error_message(exception)
|
|
328
|
+
msg = exception.message.to_s
|
|
329
|
+
cls = exception.class.name.to_s
|
|
330
|
+
|
|
331
|
+
if cls.start_with?("PG::")
|
|
332
|
+
case cls
|
|
333
|
+
when "PG::ConnectionBad"
|
|
334
|
+
"I'm having trouble connecting right now. Please try again in a moment."
|
|
335
|
+
when "PG::QueryCanceled"
|
|
336
|
+
"That question required too much processing. Could you try a more specific question?"
|
|
337
|
+
else
|
|
338
|
+
"I couldn't find the information needed to answer that. Could you rephrase your question?"
|
|
339
|
+
end
|
|
340
|
+
elsif msg.include?("timeout") || msg.include?("Timeout")
|
|
341
|
+
"That took too long to process. Try asking a more specific question."
|
|
342
|
+
elsif msg.include?("401") || msg.include?("Unauthorized")
|
|
343
|
+
"I'm having trouble reaching the AI service. Please check the API key configuration."
|
|
344
|
+
elsif msg.include?("429") || msg.include?("rate limit")
|
|
345
|
+
"The AI service is busy right now. Please try again in a moment."
|
|
346
|
+
else
|
|
347
|
+
"Something went wrong while processing your question. Please try again."
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Returns true when the PG error is about a missing/wrong column.
|
|
352
|
+
# We only run the programmatic column-fix strategy on these.
|
|
353
|
+
def column_error?(error_message)
|
|
354
|
+
msg = error_message.to_s
|
|
355
|
+
msg.include?("UndefinedColumn") || msg.match?(/column .* does not exist/i)
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
# Attempt to fix an UndefinedColumn error by finding the correct column name.
|
|
359
|
+
# Returns the corrected SQL string, or nil if no fix could be determined.
|
|
360
|
+
def try_fix_column(error_message, sql, schema)
|
|
361
|
+
return nil unless error_message.include?("UndefinedColumn") || error_message.include?("does not exist")
|
|
362
|
+
|
|
363
|
+
# Extract "alias.column" from error: column jt.name does not exist
|
|
364
|
+
col_match = error_message.match(/column\s+"?(\w+)\.(\w+)"?\s+does not exist/i)
|
|
365
|
+
return nil unless col_match
|
|
366
|
+
|
|
367
|
+
table_alias = col_match[1]
|
|
368
|
+
bad_col = col_match[2]
|
|
369
|
+
|
|
370
|
+
# Find the real table name from the SQL (e.g., "FROM job_types jt" → jt = job_types)
|
|
371
|
+
alias_match = sql.match(/(?:FROM|JOIN)\s+(\w+)\s+#{Regexp.escape(table_alias)}\b/i)
|
|
372
|
+
return nil unless alias_match
|
|
373
|
+
real_table = alias_match[1]
|
|
374
|
+
|
|
375
|
+
# Extract columns for this table from the schema
|
|
376
|
+
table_line = schema.split("\n").find { |l| l.start_with?("TABLE #{real_table} ") || l.start_with?("TABLE #{real_table}\t") }
|
|
377
|
+
return nil unless table_line
|
|
378
|
+
|
|
379
|
+
cols_in_parens = table_line.match(/\((.+)\)/)
|
|
380
|
+
return nil unless cols_in_parens
|
|
381
|
+
columns = cols_in_parens[1].scan(/(\w+)\s+\w+/).flatten
|
|
382
|
+
|
|
383
|
+
# Find the best replacement: prefer title > label > description for "name" hallucination
|
|
384
|
+
replacement = nil
|
|
385
|
+
if %w[name names].include?(bad_col.downcase)
|
|
386
|
+
replacement = (columns & %w[title label first_name display_name description]).first
|
|
387
|
+
end
|
|
388
|
+
# Fallback: fuzzy match (column containing the bad name or vice versa)
|
|
389
|
+
replacement ||= columns.find { |c| c.include?(bad_col) || bad_col.include?(c) }
|
|
390
|
+
|
|
391
|
+
return nil unless replacement
|
|
392
|
+
|
|
393
|
+
# Replace in SQL: "alias.bad_col" → "alias.replacement"
|
|
394
|
+
fixed = sql.gsub(/\b#{Regexp.escape(table_alias)}\.#{Regexp.escape(bad_col)}\b/i, "#{table_alias}.#{replacement}")
|
|
395
|
+
# Also fix ORDER BY or other unqualified uses
|
|
396
|
+
fixed == sql ? nil : fixed
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
# Build a helpful hint from the PG error and schema, e.g.:
|
|
400
|
+
# "Column 'name' does not exist on job_types. Available columns: id, title, ..."
|
|
401
|
+
def build_column_hint(error_message, schema)
|
|
402
|
+
# Extract the bad column from PG::UndefinedColumn errors
|
|
403
|
+
if error_message.include?("UndefinedColumn") || error_message.include?("does not exist")
|
|
404
|
+
# Try to extract "column X does not exist" or "column X.Y does not exist"
|
|
405
|
+
col_match = error_message.match(/column[:\s]+"?(\w+\.)?(\w+)"?\s+(does not exist|of relation)/i)
|
|
406
|
+
if col_match
|
|
407
|
+
bad_col = col_match[2]
|
|
408
|
+
# Find tables in the schema that might be relevant
|
|
409
|
+
table_columns = {}
|
|
410
|
+
current_table = nil
|
|
411
|
+
schema.split("\n").each do |line|
|
|
412
|
+
if line.start_with?("TABLE ")
|
|
413
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
414
|
+
# Extract column names from the TABLE line (format: "TABLE name (col1 TYPE, col2 TYPE, ...)")
|
|
415
|
+
cols_match = line.match(/\((.+)\)/)
|
|
416
|
+
if cols_match
|
|
417
|
+
table_columns[current_table] = cols_match[1].scan(/(\w+)\s+\w+/).flatten
|
|
418
|
+
end
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Find tables whose columns DON'T include the bad column
|
|
423
|
+
hints = table_columns.map do |table, cols|
|
|
424
|
+
next if cols.include?(bad_col)
|
|
425
|
+
"Table '#{table}' columns include: #{cols.first(15).join(', ')}"
|
|
426
|
+
end.compact
|
|
427
|
+
|
|
428
|
+
return "HINT: Column '#{bad_col}' does not exist. #{hints.first(3).join(". ")}.\n\n" unless hints.empty?
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
""
|
|
432
|
+
end
|
|
433
|
+
|
|
434
|
+
def log_error(exception)
|
|
435
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
436
|
+
Rails.logger.error("[SqlChatbot] #{exception.class}: #{exception.message}")
|
|
437
|
+
Rails.logger.error(exception.backtrace&.first(5)&.join("\n")) if exception.backtrace
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# Attempt the grammar-first path.
|
|
442
|
+
# Returns :handled if grammar hit + SQL executed + answer streamed.
|
|
443
|
+
# Returns :miss if grammar missed or disabled — caller should fall through to LLM path.
|
|
444
|
+
def try_grammar_path(yielder, question, history)
|
|
445
|
+
registry = defined?(SqlChatbot) && SqlChatbot.respond_to?(:registry) ? SqlChatbot.registry : nil
|
|
446
|
+
config = defined?(SqlChatbot) && SqlChatbot.respond_to?(:config) ? SqlChatbot.config : nil
|
|
447
|
+
|
|
448
|
+
return :miss unless registry
|
|
449
|
+
return :miss if config && config.respond_to?(:grammar_enabled) && config.grammar_enabled == false
|
|
450
|
+
|
|
451
|
+
call_llm = ->(messages) { @llm.call(messages, json_mode: true) }
|
|
452
|
+
threshold = config.respond_to?(:grammar_confidence_threshold) ? config.grammar_confidence_threshold : 0.7
|
|
453
|
+
miss_log = resolved_miss_log_path(config)
|
|
454
|
+
|
|
455
|
+
pipeline = GrammarPipeline.new(
|
|
456
|
+
registry: registry,
|
|
457
|
+
call_llm: call_llm,
|
|
458
|
+
confidence_threshold: threshold,
|
|
459
|
+
miss_log_path: miss_log
|
|
460
|
+
)
|
|
461
|
+
|
|
462
|
+
result = pipeline.try(question: question, history: history)
|
|
463
|
+
|
|
464
|
+
unless result[:ok]
|
|
465
|
+
yielder.yield({ type: "grammar_fallback", data: { reason: result[:reason] } })
|
|
466
|
+
return :miss
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
sql = result[:sql]
|
|
470
|
+
|
|
471
|
+
# Validate BEFORE emitting grammar_matched — if SQL is bad, fall through
|
|
472
|
+
# silently to the LLM path rather than showing the user a broken SQL.
|
|
473
|
+
validation = SqlExecutor.validate_sql(sql)
|
|
474
|
+
unless validation[:valid]
|
|
475
|
+
SqlChatbot::Grammar::MissLogger.log(miss_log, {
|
|
476
|
+
question: question,
|
|
477
|
+
reason: "grammar_validation_failed: #{validation[:reason]}",
|
|
478
|
+
extracted: result[:intent],
|
|
479
|
+
resulting_sql: sql,
|
|
480
|
+
}) rescue nil
|
|
481
|
+
yielder.yield({ type: "grammar_fallback", data: { reason: "grammar_validation_failed" } })
|
|
482
|
+
return :miss
|
|
483
|
+
end
|
|
484
|
+
|
|
485
|
+
# Try execution BEFORE emitting grammar_matched. If execution fails
|
|
486
|
+
# (e.g., duplicate ORDER BY from a TOP_N + order_by quirk), the user
|
|
487
|
+
# should not see a broken grammar response — fall through to LLM.
|
|
488
|
+
yielder.yield({ type: "executing" })
|
|
489
|
+
begin
|
|
490
|
+
db_result = SqlExecutor.execute_sql(validation[:sql])
|
|
491
|
+
rescue => e
|
|
492
|
+
log_error(e)
|
|
493
|
+
SqlChatbot::Grammar::MissLogger.log(miss_log, {
|
|
494
|
+
question: question,
|
|
495
|
+
reason: "grammar_execution_error: #{e.class}: #{e.message.to_s.lines.first&.strip}",
|
|
496
|
+
extracted: result[:intent],
|
|
497
|
+
resulting_sql: validation[:sql],
|
|
498
|
+
}) rescue nil
|
|
499
|
+
yielder.yield({ type: "grammar_fallback", data: { reason: "grammar_execution_error" } })
|
|
500
|
+
return :miss
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
# Sanity check: for COUNT primitives, compare result to registry rowCount.
|
|
504
|
+
# Catches "plausible but wrong" answers (e.g., reserved-word silent
|
|
505
|
+
# corruption) before the user sees them.
|
|
506
|
+
intent = result[:intent] || {}
|
|
507
|
+
primitive = (intent[:primitive] || intent["primitive"]).to_s
|
|
508
|
+
entity_name = registry.aliases[intent[:entity] || intent["entity"]] || intent[:entity] || intent["entity"]
|
|
509
|
+
sanity_entity = entity_name && registry.entities[entity_name.to_s]
|
|
510
|
+
sanity = if sanity_entity
|
|
511
|
+
SqlChatbot::Grammar::SanityCheck.check_count(primitive, sanity_entity, db_result[:rows])
|
|
512
|
+
else
|
|
513
|
+
{ ok: true }
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
unless sanity[:ok]
|
|
517
|
+
SqlChatbot::Grammar::MissLogger.log(miss_log, {
|
|
518
|
+
question: question,
|
|
519
|
+
reason: sanity[:reason],
|
|
520
|
+
extracted: result[:intent],
|
|
521
|
+
resulting_sql: validation[:sql],
|
|
522
|
+
}) rescue nil
|
|
523
|
+
yielder.yield({ type: "grammar_fallback", data: { reason: "count_mismatch" } })
|
|
524
|
+
return :miss
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
# Only now — after successful validation AND execution AND sanity check —
|
|
528
|
+
# commit to the grammar path by emitting grammar_matched and the SQL event.
|
|
529
|
+
yielder.yield({ type: "grammar_matched", data: {} })
|
|
530
|
+
yielder.yield({ type: "sql", query: validation[:sql], explanation: "grammar" })
|
|
531
|
+
|
|
532
|
+
# Programmatic renderers — bypass the answer LLM entirely.
|
|
533
|
+
# The unified registry dispatches by primitive. Adding new primitive
|
|
534
|
+
# renderers (SUM/AVG/MIN_MAX/RANK) is a one-line entry in HANDLERS,
|
|
535
|
+
# not a new orchestrator branch.
|
|
536
|
+
rank_field = intent[:rank_field] || intent["rank_field"]
|
|
537
|
+
agg_field = intent[:field] || intent["field"]
|
|
538
|
+
which = intent[:which] || intent["which"]
|
|
539
|
+
render = SqlChatbot::Grammar::ProgrammaticRenderer.try_render(
|
|
540
|
+
primitive,
|
|
541
|
+
sanity_entity&.display_label,
|
|
542
|
+
db_result[:rows],
|
|
543
|
+
rank_field: rank_field,
|
|
544
|
+
field: agg_field,
|
|
545
|
+
which: which,
|
|
546
|
+
)
|
|
547
|
+
if render[:ok]
|
|
548
|
+
yielder.yield({ type: "token", content: render[:text] })
|
|
549
|
+
return :handled
|
|
550
|
+
end
|
|
551
|
+
|
|
552
|
+
# Bug D guard: if the SQL returned an empty-shaped result that the
|
|
553
|
+
# primitive's handler declined to render, the answer LLM tends to
|
|
554
|
+
# emit "No matching records found." — wrong for COUNT(*)=0 and NULL
|
|
555
|
+
# aggregates. Detect those shapes and render programmatically here.
|
|
556
|
+
guarded = SqlChatbot::Grammar::ProgrammaticRenderer.render_empty_for_llm_sql(
|
|
557
|
+
question, validation[:sql], db_result[:rows]
|
|
558
|
+
)
|
|
559
|
+
if guarded
|
|
560
|
+
yielder.yield({ type: "token", content: guarded })
|
|
561
|
+
return :handled
|
|
562
|
+
end
|
|
563
|
+
|
|
564
|
+
answer_messages = Prompts::Answer.build_messages(
|
|
565
|
+
question: question,
|
|
566
|
+
type: "data",
|
|
567
|
+
sql_result: db_result[:rows],
|
|
568
|
+
sql_query: validation[:sql],
|
|
569
|
+
history: history
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
@llm.stream(answer_messages) do |chunk|
|
|
573
|
+
yielder.yield({ type: "token", content: chunk })
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
:handled
|
|
577
|
+
rescue => e
|
|
578
|
+
log_error(e)
|
|
579
|
+
# Grammar path failure — fall through to LLM path
|
|
580
|
+
:miss
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
def resolved_miss_log_path(config)
|
|
584
|
+
if config && config.respond_to?(:grammar_miss_log_path) && config.grammar_miss_log_path
|
|
585
|
+
config.grammar_miss_log_path
|
|
586
|
+
elsif defined?(Rails) && Rails.respond_to?(:root) && Rails.root
|
|
587
|
+
Rails.root.join("log", "grammar-misses.ndjson").to_s
|
|
588
|
+
else
|
|
589
|
+
"/tmp/grammar-misses.ndjson"
|
|
590
|
+
end
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
def build_route_list
|
|
594
|
+
routes_by_path = {}
|
|
595
|
+
|
|
596
|
+
# 1. Code indexer routes (lowest priority)
|
|
597
|
+
@code_indexer.get_routes.each do |r|
|
|
598
|
+
routes_by_path[r[:path]] ||= { path: r[:path], method: r[:method], label: nil, source: "code_indexer" }
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
# 2. Manifest routes from widget (higher priority, has labels)
|
|
602
|
+
if @manifest && @manifest["routes"]
|
|
603
|
+
@manifest["routes"].each do |r|
|
|
604
|
+
routes_by_path[r["path"]] = {
|
|
605
|
+
path: r["path"],
|
|
606
|
+
method: r["method"] || "GET",
|
|
607
|
+
label: r["label"],
|
|
608
|
+
parentPath: r["parentPath"],
|
|
609
|
+
source: "manifest"
|
|
610
|
+
}
|
|
611
|
+
end
|
|
612
|
+
end
|
|
613
|
+
|
|
614
|
+
# 3. RouteIntrospector routes (highest priority for Rails apps)
|
|
615
|
+
if @route_introspector_data
|
|
616
|
+
@route_introspector_data.each do |r|
|
|
617
|
+
routes_by_path[r[:path]] = r.merge(source: "introspector")
|
|
618
|
+
end
|
|
619
|
+
end
|
|
620
|
+
|
|
621
|
+
return "No application routes detected." if routes_by_path.empty?
|
|
622
|
+
|
|
623
|
+
lines = routes_by_path.values
|
|
624
|
+
.select { |r| r[:method] == "GET" }
|
|
625
|
+
.map do |r|
|
|
626
|
+
parent_note = r[:parentPath] ? " (under #{r[:parentPath]})" : ""
|
|
627
|
+
label = r[:label] || r[:path].split("/").last&.capitalize || "Page"
|
|
628
|
+
"- #{r[:path]} \u2014 #{label}#{parent_note}"
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
"## Available Application Pages\n#{lines.join("\n")}"
|
|
632
|
+
end
|
|
633
|
+
end
|
|
634
|
+
end
|
|
635
|
+
end
|