sql-chatbot-rails 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +20 -0
  4. data/app/controllers/sql_chatbot/chatbot_controller.rb +158 -0
  5. data/config/routes.rb +11 -0
  6. data/lib/generators/sql_chatbot/install_generator.rb +25 -0
  7. data/lib/generators/sql_chatbot/templates/initializer.rb +22 -0
  8. data/lib/sql_chatbot/auth/cors.rb +35 -0
  9. data/lib/sql_chatbot/auth/jwt.rb +34 -0
  10. data/lib/sql_chatbot/configuration.rb +58 -0
  11. data/lib/sql_chatbot/engine.rb +23 -0
  12. data/lib/sql_chatbot/grammar/count_renderer.rb +113 -0
  13. data/lib/sql_chatbot/grammar/entity_candidates.rb +210 -0
  14. data/lib/sql_chatbot/grammar/intent_extractor.rb +191 -0
  15. data/lib/sql_chatbot/grammar/list_renderer.rb +50 -0
  16. data/lib/sql_chatbot/grammar/miss_logger.rb +17 -0
  17. data/lib/sql_chatbot/grammar/modifiers.rb +145 -0
  18. data/lib/sql_chatbot/grammar/primitives.rb +69 -0
  19. data/lib/sql_chatbot/grammar/programmatic_renderer.rb +258 -0
  20. data/lib/sql_chatbot/grammar/registry.rb +66 -0
  21. data/lib/sql_chatbot/grammar/sanity_check.rb +37 -0
  22. data/lib/sql_chatbot/grammar/template_compiler.rb +179 -0
  23. data/lib/sql_chatbot/llm/client.rb +87 -0
  24. data/lib/sql_chatbot/prompts/answer.rb +157 -0
  25. data/lib/sql_chatbot/prompts/classify.rb +59 -0
  26. data/lib/sql_chatbot/prompts/generate_sql.rb +88 -0
  27. data/lib/sql_chatbot/services/code_indexer.rb +337 -0
  28. data/lib/sql_chatbot/services/grammar_pipeline.rb +45 -0
  29. data/lib/sql_chatbot/services/model_introspector.rb +152 -0
  30. data/lib/sql_chatbot/services/orchestrator.rb +635 -0
  31. data/lib/sql_chatbot/services/registry_builder.rb +385 -0
  32. data/lib/sql_chatbot/services/route_introspector.rb +118 -0
  33. data/lib/sql_chatbot/services/schema_service.rb +884 -0
  34. data/lib/sql_chatbot/services/sql_executor.rb +81 -0
  35. data/lib/sql_chatbot/version.rb +5 -0
  36. data/lib/sql_chatbot_rails.rb +91 -0
  37. data/vendor/assets/widget.js +53 -0
  38. metadata +180 -0
@@ -0,0 +1,635 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "sql_chatbot/prompts/classify"
5
+ require "sql_chatbot/prompts/generate_sql"
6
+ require "sql_chatbot/prompts/answer"
7
+ require "sql_chatbot/services/sql_executor"
8
+ require "sql_chatbot/services/grammar_pipeline"
9
+ require "sql_chatbot/grammar/miss_logger"
10
+ require "sql_chatbot/grammar/sanity_check"
11
+ require "sql_chatbot/grammar/list_renderer"
12
+ require "sql_chatbot/grammar/count_renderer"
13
+ require "sql_chatbot/grammar/programmatic_renderer"
14
+
15
+ module SqlChatbot
16
+ module Services
17
+ class Orchestrator
18
+ VALID_TYPES = %w[data data_with_code code navigation guidance greeting unsafe].freeze
19
+
20
+ def initialize(llm_client:, schema_service:, code_indexer:, route_introspector_data: nil)
21
+ @llm = llm_client
22
+ @schema = schema_service
23
+ @code_indexer = code_indexer
24
+ @route_introspector_data = route_introspector_data
25
+ @manifest = nil
26
+ end
27
+
28
+ def set_manifest(manifest)
29
+ version = manifest["version"] || manifest[:version]
30
+ unless version == 1
31
+ warn "[SqlChatbot] Unsupported manifest version: #{version}"
32
+ return
33
+ end
34
+ @manifest = manifest
35
+ end
36
+
37
+ def route_list
38
+ build_route_list
39
+ end
40
+
41
+ # Returns an Enumerator that yields SSE event hashes.
42
+ # Events: classifying, classified, sql, executing, token, done, error
43
+ def handle_question(question:, page_context: nil, history: [])
44
+ Enumerator.new do |yielder|
45
+ begin
46
+ # --- Step 1: Classify ---
47
+ yielder.yield({ type: "classifying" })
48
+
49
+ table_names_str = @schema.table_names
50
+ classify_messages = Prompts::Classify.build_messages(
51
+ question: question,
52
+ schema_summary: table_names_str,
53
+ page_context: page_context,
54
+ history: history
55
+ )
56
+
57
+ raw = @llm.call(classify_messages, json_mode: true)
58
+ classification = parse_classification(raw)
59
+
60
+ yielder.yield({
61
+ type: "classified",
62
+ questionType: classification[:type],
63
+ confidence: classification[:confidence]
64
+ })
65
+
66
+ # --- Step 2: Route by question type ---
67
+ case classification[:type]
68
+ when "data", "data_with_code"
69
+ handle_data_with_code(yielder, question, classification, page_context, history)
70
+ when "code"
71
+ handle_code(yielder, question, classification, history)
72
+ when "navigation", "guidance"
73
+ handle_navigation(yielder, question, classification[:type], page_context, history)
74
+ when "greeting"
75
+ handle_greeting(yielder, question, history)
76
+ when "unsafe"
77
+ handle_unsafe(yielder)
78
+ end
79
+
80
+ yielder.yield({ type: "done" })
81
+ rescue => e
82
+ log_error(e)
83
+ yielder.yield({ type: "error", message: friendly_error_message(e) })
84
+ end
85
+ end
86
+ end
87
+
88
+ private
89
+
90
+ # ============================================================
91
+ # Route handlers
92
+ # ============================================================
93
+
94
+ def handle_data_with_code(yielder, question, classification, page_context, history)
95
+ # --- Grammar-first path (before LLM SQL generation) ---
96
+ grammar_result = try_grammar_path(yielder, question, history)
97
+ if grammar_result == :handled
98
+ return
99
+ end
100
+
101
+ # Search code index for context
102
+ search_terms = classification[:searchTerms] || []
103
+ code_results = search_terms.empty? ? [] : @code_indexer.search(search_terms)
104
+ code_context = format_code_context(code_results)
105
+ code_snippets = to_code_snippets(code_results)
106
+
107
+ question_type = code_context.empty? ? "data" : "data_with_code"
108
+
109
+ # Find lookup hints matching the question
110
+ lookup_hints = @schema.find_lookup_hints(question)
111
+
112
+ # Select only relevant schema tables based on search terms
113
+ selected_schema = @schema.select_schema(search_terms)
114
+
115
+ # Generate SQL (with one retry on execution error)
116
+ gen_messages = Prompts::GenerateSql.build_messages(
117
+ question: question,
118
+ schema: selected_schema,
119
+ code_context: code_context.empty? ? nil : code_context,
120
+ lookup_hints: lookup_hints.empty? ? nil : lookup_hints,
121
+ history: history
122
+ )
123
+ raw_sql = @llm.call(gen_messages, json_mode: true)
124
+ parsed = parse_sql_generation(raw_sql)
125
+
126
+ if parsed[:sql].empty?
127
+ yielder.yield({ type: "error", message: "Failed to generate SQL" })
128
+ return
129
+ end
130
+
131
+ yielder.yield({ type: "sql", query: parsed[:sql], explanation: parsed[:explanation] })
132
+
133
+ # Validate SQL
134
+ validation = SqlExecutor.validate_sql(parsed[:sql])
135
+ unless validation[:valid]
136
+ yielder.yield({ type: "error", message: "SQL validation failed: #{validation[:reason]}" })
137
+ return
138
+ end
139
+
140
+ # Execute SQL with one retry on recoverable errors
141
+ yielder.yield({ type: "executing" })
142
+
143
+ begin
144
+ result = SqlExecutor.execute_sql(validation[:sql])
145
+ rescue ActiveRecord::StatementInvalid => e
146
+ log_error(e)
147
+ # Strategy 1: programmatic column fix (only for column errors)
148
+ if column_error?(e.message)
149
+ fixed_sql = try_fix_column(e.message, validation[:sql], selected_schema)
150
+ if fixed_sql
151
+ begin
152
+ fixed_validation = SqlExecutor.validate_sql(fixed_sql)
153
+ if fixed_validation[:valid]
154
+ yielder.yield({ type: "sql", query: fixed_sql, explanation: "Auto-corrected column name" })
155
+ result = SqlExecutor.execute_sql(fixed_validation[:sql])
156
+ end
157
+ rescue => _fix_error
158
+ # fall through to LLM retry
159
+ end
160
+ end
161
+ end
162
+
163
+ # Strategy 2: LLM retry — V1.2 #8 extends to ALL PG errors, not just column ones.
164
+ unless defined?(result) && result
165
+ error_hint = column_error?(e.message) ? build_column_hint(e.message, selected_schema) : ""
166
+ retry_messages = gen_messages + [
167
+ { role: "assistant", content: raw_sql },
168
+ { role: "user", content: "The SQL query failed with this error:\n#{e.message}\n\n#{error_hint}Please fix the SQL." }
169
+ ]
170
+ begin
171
+ retry_sql = @llm.call(retry_messages, json_mode: true)
172
+ retry_parsed = parse_sql_generation(retry_sql)
173
+ if !retry_parsed[:sql].empty?
174
+ retry_validation = SqlExecutor.validate_sql(retry_parsed[:sql])
175
+ if retry_validation[:valid]
176
+ yielder.yield({ type: "sql", query: retry_parsed[:sql], explanation: "Corrected: #{retry_parsed[:explanation]}" })
177
+ begin
178
+ result = SqlExecutor.execute_sql(retry_validation[:sql])
179
+ rescue => _retry_exec_err
180
+ # retry also errored — fall through to graceful message
181
+ end
182
+ end
183
+ end
184
+ rescue => _llm_retry_err
185
+ # LLM call failed — fall through
186
+ end
187
+ end
188
+
189
+ # Strategy 3: Graceful message (V1.2 #8). Never render raw PG to user.
190
+ unless defined?(result) && result
191
+ yielder.yield({ type: "token", content: "I couldn't answer that one — could you rephrase or be more specific?" })
192
+ return
193
+ end
194
+ rescue => e
195
+ log_error(e)
196
+ yielder.yield({ type: "token", content: "I couldn't answer that one — could you rephrase or be more specific?" })
197
+ return
198
+ end
199
+
200
+ # Bug D guard: bypass the answer LLM for empty results, which it
201
+ # mis-renders as "No matching records found." even for valid 0-counts
202
+ # and NULL aggregates. Hard sweep on 2026-04-28 surfaced 5 of these.
203
+ # The guard returns nil when there is data — LLM does its normal job.
204
+ guarded = SqlChatbot::Grammar::ProgrammaticRenderer.render_empty_for_llm_sql(
205
+ question, validation[:sql], result[:rows]
206
+ )
207
+ if guarded
208
+ yielder.yield({ type: "token", content: guarded })
209
+ return
210
+ end
211
+
212
+ # Extract enum context from the selected schema for answer translation
213
+ enum_context = @schema.extract_enum_context(selected_schema)
214
+
215
+ # Stream answer
216
+ answer_messages = Prompts::Answer.build_messages(
217
+ question: question,
218
+ type: question_type,
219
+ sql_result: result[:rows],
220
+ sql_query: validation[:sql],
221
+ code_snippets: code_snippets.empty? ? nil : code_snippets,
222
+ page_context: page_context,
223
+ history: history,
224
+ enum_context: enum_context.empty? ? nil : enum_context
225
+ )
226
+
227
+ @llm.stream(answer_messages) do |chunk|
228
+ yielder.yield({ type: "token", content: chunk })
229
+ end
230
+ end
231
+
232
+ def handle_code(yielder, question, classification, history)
233
+ search_terms = classification[:searchTerms] || []
234
+ code_results = search_terms.empty? ? [] : @code_indexer.search(search_terms)
235
+
236
+ if code_results.empty?
237
+ yielder.yield({ type: "token", content: "I couldn't find relevant code for that question." })
238
+ return
239
+ end
240
+
241
+ code_snippets = to_code_snippets(code_results)
242
+
243
+ answer_messages = Prompts::Answer.build_messages(
244
+ question: question,
245
+ type: "code",
246
+ code_snippets: code_snippets,
247
+ history: history
248
+ )
249
+
250
+ @llm.stream(answer_messages) do |chunk|
251
+ yielder.yield({ type: "token", content: chunk })
252
+ end
253
+ end
254
+
255
+ def handle_navigation(yielder, question, type, page_context, history)
256
+ merged_routes = build_route_list
257
+
258
+ answer_messages = Prompts::Answer.build_messages(
259
+ question: question,
260
+ type: type,
261
+ page_context: page_context,
262
+ route_list: merged_routes,
263
+ history: history
264
+ )
265
+
266
+ @llm.stream(answer_messages) do |chunk|
267
+ yielder.yield({ type: "token", content: chunk })
268
+ end
269
+ end
270
+
271
+ def handle_greeting(yielder, question, history)
272
+ answer_messages = Prompts::Answer.build_messages(
273
+ question: question,
274
+ type: "greeting",
275
+ history: history
276
+ )
277
+
278
+ @llm.stream(answer_messages) do |chunk|
279
+ yielder.yield({ type: "token", content: chunk })
280
+ end
281
+ end
282
+
283
+ def handle_unsafe(yielder)
284
+ yielder.yield({ type: "token", content: "I can't help with that request." })
285
+ end
286
+
287
+ # ============================================================
288
+ # Parsing helpers
289
+ # ============================================================
290
+
291
+ def parse_classification(raw)
292
+ parsed = JSON.parse(raw, symbolize_names: true)
293
+ type = parsed[:type]
294
+ type = "data" unless VALID_TYPES.include?(type)
295
+ {
296
+ type: type,
297
+ confidence: parsed[:confidence].is_a?(Numeric) ? parsed[:confidence] : 0.5,
298
+ searchTerms: parsed[:searchTerms].is_a?(Array) ? parsed[:searchTerms] : []
299
+ }
300
+ rescue JSON::ParserError
301
+ { type: "data", confidence: 0.5, searchTerms: [] }
302
+ end
303
+
304
+ def parse_sql_generation(raw)
305
+ parsed = JSON.parse(raw, symbolize_names: true)
306
+ sql = parsed[:sql]
307
+ sql = "" unless sql.is_a?(String) && !sql.empty?
308
+ { sql: sql, explanation: (parsed[:explanation] || "").to_s }
309
+ rescue JSON::ParserError
310
+ { sql: "", explanation: "" }
311
+ end
312
+
313
+ # ============================================================
314
+ # Formatting helpers
315
+ # ============================================================
316
+
317
+ def format_code_context(results)
318
+ return "" if results.empty?
319
+
320
+ results.map { |r| "File: #{r[:file]}\n#{r[:content]}" }.join("\n\n")
321
+ end
322
+
323
+ def to_code_snippets(results)
324
+ results.map { |r| { file_path: r[:file], content: r[:content] } }
325
+ end
326
+
327
+ def friendly_error_message(exception)
328
+ msg = exception.message.to_s
329
+ cls = exception.class.name.to_s
330
+
331
+ if cls.start_with?("PG::")
332
+ case cls
333
+ when "PG::ConnectionBad"
334
+ "I'm having trouble connecting right now. Please try again in a moment."
335
+ when "PG::QueryCanceled"
336
+ "That question required too much processing. Could you try a more specific question?"
337
+ else
338
+ "I couldn't find the information needed to answer that. Could you rephrase your question?"
339
+ end
340
+ elsif msg.include?("timeout") || msg.include?("Timeout")
341
+ "That took too long to process. Try asking a more specific question."
342
+ elsif msg.include?("401") || msg.include?("Unauthorized")
343
+ "I'm having trouble reaching the AI service. Please check the API key configuration."
344
+ elsif msg.include?("429") || msg.include?("rate limit")
345
+ "The AI service is busy right now. Please try again in a moment."
346
+ else
347
+ "Something went wrong while processing your question. Please try again."
348
+ end
349
+ end
350
+
351
+ # Returns true when the PG error is about a missing/wrong column.
352
+ # We only run the programmatic column-fix strategy on these.
353
+ def column_error?(error_message)
354
+ msg = error_message.to_s
355
+ msg.include?("UndefinedColumn") || msg.match?(/column .* does not exist/i)
356
+ end
357
+
358
+ # Attempt to fix an UndefinedColumn error by finding the correct column name.
359
+ # Returns the corrected SQL string, or nil if no fix could be determined.
360
+ def try_fix_column(error_message, sql, schema)
361
+ return nil unless error_message.include?("UndefinedColumn") || error_message.include?("does not exist")
362
+
363
+ # Extract "alias.column" from error: column jt.name does not exist
364
+ col_match = error_message.match(/column\s+"?(\w+)\.(\w+)"?\s+does not exist/i)
365
+ return nil unless col_match
366
+
367
+ table_alias = col_match[1]
368
+ bad_col = col_match[2]
369
+
370
+ # Find the real table name from the SQL (e.g., "FROM job_types jt" → jt = job_types)
371
+ alias_match = sql.match(/(?:FROM|JOIN)\s+(\w+)\s+#{Regexp.escape(table_alias)}\b/i)
372
+ return nil unless alias_match
373
+ real_table = alias_match[1]
374
+
375
+ # Extract columns for this table from the schema
376
+ table_line = schema.split("\n").find { |l| l.start_with?("TABLE #{real_table} ") || l.start_with?("TABLE #{real_table}\t") }
377
+ return nil unless table_line
378
+
379
+ cols_in_parens = table_line.match(/\((.+)\)/)
380
+ return nil unless cols_in_parens
381
+ columns = cols_in_parens[1].scan(/(\w+)\s+\w+/).flatten
382
+
383
+ # Find the best replacement: prefer title > label > description for "name" hallucination
384
+ replacement = nil
385
+ if %w[name names].include?(bad_col.downcase)
386
+ replacement = (columns & %w[title label first_name display_name description]).first
387
+ end
388
+ # Fallback: fuzzy match (column containing the bad name or vice versa)
389
+ replacement ||= columns.find { |c| c.include?(bad_col) || bad_col.include?(c) }
390
+
391
+ return nil unless replacement
392
+
393
+ # Replace in SQL: "alias.bad_col" → "alias.replacement"
394
+ fixed = sql.gsub(/\b#{Regexp.escape(table_alias)}\.#{Regexp.escape(bad_col)}\b/i, "#{table_alias}.#{replacement}")
395
+ # Also fix ORDER BY or other unqualified uses
396
+ fixed == sql ? nil : fixed
397
+ end
398
+
399
+ # Build a helpful hint from the PG error and schema, e.g.:
400
+ # "Column 'name' does not exist on job_types. Available columns: id, title, ..."
401
+ def build_column_hint(error_message, schema)
402
+ # Extract the bad column from PG::UndefinedColumn errors
403
+ if error_message.include?("UndefinedColumn") || error_message.include?("does not exist")
404
+ # Try to extract "column X does not exist" or "column X.Y does not exist"
405
+ col_match = error_message.match(/column[:\s]+"?(\w+\.)?(\w+)"?\s+(does not exist|of relation)/i)
406
+ if col_match
407
+ bad_col = col_match[2]
408
+ # Find tables in the schema that might be relevant
409
+ table_columns = {}
410
+ current_table = nil
411
+ schema.split("\n").each do |line|
412
+ if line.start_with?("TABLE ")
413
+ current_table = line.match(/^TABLE (\S+)/)[1]
414
+ # Extract column names from the TABLE line (format: "TABLE name (col1 TYPE, col2 TYPE, ...)")
415
+ cols_match = line.match(/\((.+)\)/)
416
+ if cols_match
417
+ table_columns[current_table] = cols_match[1].scan(/(\w+)\s+\w+/).flatten
418
+ end
419
+ end
420
+ end
421
+
422
+ # Find tables whose columns DON'T include the bad column
423
+ hints = table_columns.map do |table, cols|
424
+ next if cols.include?(bad_col)
425
+ "Table '#{table}' columns include: #{cols.first(15).join(', ')}"
426
+ end.compact
427
+
428
+ return "HINT: Column '#{bad_col}' does not exist. #{hints.first(3).join(". ")}.\n\n" unless hints.empty?
429
+ end
430
+ end
431
+ ""
432
+ end
433
+
434
+ def log_error(exception)
435
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
436
+ Rails.logger.error("[SqlChatbot] #{exception.class}: #{exception.message}")
437
+ Rails.logger.error(exception.backtrace&.first(5)&.join("\n")) if exception.backtrace
438
+ end
439
+ end
440
+
441
+ # Attempt the grammar-first path.
442
+ # Returns :handled if grammar hit + SQL executed + answer streamed.
443
+ # Returns :miss if grammar missed or disabled — caller should fall through to LLM path.
444
+ def try_grammar_path(yielder, question, history)
445
+ registry = defined?(SqlChatbot) && SqlChatbot.respond_to?(:registry) ? SqlChatbot.registry : nil
446
+ config = defined?(SqlChatbot) && SqlChatbot.respond_to?(:config) ? SqlChatbot.config : nil
447
+
448
+ return :miss unless registry
449
+ return :miss if config && config.respond_to?(:grammar_enabled) && config.grammar_enabled == false
450
+
451
+ call_llm = ->(messages) { @llm.call(messages, json_mode: true) }
452
+ threshold = config.respond_to?(:grammar_confidence_threshold) ? config.grammar_confidence_threshold : 0.7
453
+ miss_log = resolved_miss_log_path(config)
454
+
455
+ pipeline = GrammarPipeline.new(
456
+ registry: registry,
457
+ call_llm: call_llm,
458
+ confidence_threshold: threshold,
459
+ miss_log_path: miss_log
460
+ )
461
+
462
+ result = pipeline.try(question: question, history: history)
463
+
464
+ unless result[:ok]
465
+ yielder.yield({ type: "grammar_fallback", data: { reason: result[:reason] } })
466
+ return :miss
467
+ end
468
+
469
+ sql = result[:sql]
470
+
471
+ # Validate BEFORE emitting grammar_matched — if SQL is bad, fall through
472
+ # silently to the LLM path rather than showing the user a broken SQL.
473
+ validation = SqlExecutor.validate_sql(sql)
474
+ unless validation[:valid]
475
+ SqlChatbot::Grammar::MissLogger.log(miss_log, {
476
+ question: question,
477
+ reason: "grammar_validation_failed: #{validation[:reason]}",
478
+ extracted: result[:intent],
479
+ resulting_sql: sql,
480
+ }) rescue nil
481
+ yielder.yield({ type: "grammar_fallback", data: { reason: "grammar_validation_failed" } })
482
+ return :miss
483
+ end
484
+
485
+ # Try execution BEFORE emitting grammar_matched. If execution fails
486
+ # (e.g., duplicate ORDER BY from a TOP_N + order_by quirk), the user
487
+ # should not see a broken grammar response — fall through to LLM.
488
+ yielder.yield({ type: "executing" })
489
+ begin
490
+ db_result = SqlExecutor.execute_sql(validation[:sql])
491
+ rescue => e
492
+ log_error(e)
493
+ SqlChatbot::Grammar::MissLogger.log(miss_log, {
494
+ question: question,
495
+ reason: "grammar_execution_error: #{e.class}: #{e.message.to_s.lines.first&.strip}",
496
+ extracted: result[:intent],
497
+ resulting_sql: validation[:sql],
498
+ }) rescue nil
499
+ yielder.yield({ type: "grammar_fallback", data: { reason: "grammar_execution_error" } })
500
+ return :miss
501
+ end
502
+
503
+ # Sanity check: for COUNT primitives, compare result to registry rowCount.
504
+ # Catches "plausible but wrong" answers (e.g., reserved-word silent
505
+ # corruption) before the user sees them.
506
+ intent = result[:intent] || {}
507
+ primitive = (intent[:primitive] || intent["primitive"]).to_s
508
+ entity_name = registry.aliases[intent[:entity] || intent["entity"]] || intent[:entity] || intent["entity"]
509
+ sanity_entity = entity_name && registry.entities[entity_name.to_s]
510
+ sanity = if sanity_entity
511
+ SqlChatbot::Grammar::SanityCheck.check_count(primitive, sanity_entity, db_result[:rows])
512
+ else
513
+ { ok: true }
514
+ end
515
+
516
+ unless sanity[:ok]
517
+ SqlChatbot::Grammar::MissLogger.log(miss_log, {
518
+ question: question,
519
+ reason: sanity[:reason],
520
+ extracted: result[:intent],
521
+ resulting_sql: validation[:sql],
522
+ }) rescue nil
523
+ yielder.yield({ type: "grammar_fallback", data: { reason: "count_mismatch" } })
524
+ return :miss
525
+ end
526
+
527
+ # Only now — after successful validation AND execution AND sanity check —
528
+ # commit to the grammar path by emitting grammar_matched and the SQL event.
529
+ yielder.yield({ type: "grammar_matched", data: {} })
530
+ yielder.yield({ type: "sql", query: validation[:sql], explanation: "grammar" })
531
+
532
+ # Programmatic renderers — bypass the answer LLM entirely.
533
+ # The unified registry dispatches by primitive. Adding new primitive
534
+ # renderers (SUM/AVG/MIN_MAX/RANK) is a one-line entry in HANDLERS,
535
+ # not a new orchestrator branch.
536
+ rank_field = intent[:rank_field] || intent["rank_field"]
537
+ agg_field = intent[:field] || intent["field"]
538
+ which = intent[:which] || intent["which"]
539
+ render = SqlChatbot::Grammar::ProgrammaticRenderer.try_render(
540
+ primitive,
541
+ sanity_entity&.display_label,
542
+ db_result[:rows],
543
+ rank_field: rank_field,
544
+ field: agg_field,
545
+ which: which,
546
+ )
547
+ if render[:ok]
548
+ yielder.yield({ type: "token", content: render[:text] })
549
+ return :handled
550
+ end
551
+
552
+ # Bug D guard: if the SQL returned an empty-shaped result that the
553
+ # primitive's handler declined to render, the answer LLM tends to
554
+ # emit "No matching records found." — wrong for COUNT(*)=0 and NULL
555
+ # aggregates. Detect those shapes and render programmatically here.
556
+ guarded = SqlChatbot::Grammar::ProgrammaticRenderer.render_empty_for_llm_sql(
557
+ question, validation[:sql], db_result[:rows]
558
+ )
559
+ if guarded
560
+ yielder.yield({ type: "token", content: guarded })
561
+ return :handled
562
+ end
563
+
564
+ answer_messages = Prompts::Answer.build_messages(
565
+ question: question,
566
+ type: "data",
567
+ sql_result: db_result[:rows],
568
+ sql_query: validation[:sql],
569
+ history: history
570
+ )
571
+
572
+ @llm.stream(answer_messages) do |chunk|
573
+ yielder.yield({ type: "token", content: chunk })
574
+ end
575
+
576
+ :handled
577
+ rescue => e
578
+ log_error(e)
579
+ # Grammar path failure — fall through to LLM path
580
+ :miss
581
+ end
582
+
583
+ def resolved_miss_log_path(config)
584
+ if config && config.respond_to?(:grammar_miss_log_path) && config.grammar_miss_log_path
585
+ config.grammar_miss_log_path
586
+ elsif defined?(Rails) && Rails.respond_to?(:root) && Rails.root
587
+ Rails.root.join("log", "grammar-misses.ndjson").to_s
588
+ else
589
+ "/tmp/grammar-misses.ndjson"
590
+ end
591
+ end
592
+
593
+ def build_route_list
594
+ routes_by_path = {}
595
+
596
+ # 1. Code indexer routes (lowest priority)
597
+ @code_indexer.get_routes.each do |r|
598
+ routes_by_path[r[:path]] ||= { path: r[:path], method: r[:method], label: nil, source: "code_indexer" }
599
+ end
600
+
601
+ # 2. Manifest routes from widget (higher priority, has labels)
602
+ if @manifest && @manifest["routes"]
603
+ @manifest["routes"].each do |r|
604
+ routes_by_path[r["path"]] = {
605
+ path: r["path"],
606
+ method: r["method"] || "GET",
607
+ label: r["label"],
608
+ parentPath: r["parentPath"],
609
+ source: "manifest"
610
+ }
611
+ end
612
+ end
613
+
614
+ # 3. RouteIntrospector routes (highest priority for Rails apps)
615
+ if @route_introspector_data
616
+ @route_introspector_data.each do |r|
617
+ routes_by_path[r[:path]] = r.merge(source: "introspector")
618
+ end
619
+ end
620
+
621
+ return "No application routes detected." if routes_by_path.empty?
622
+
623
+ lines = routes_by_path.values
624
+ .select { |r| r[:method] == "GET" }
625
+ .map do |r|
626
+ parent_note = r[:parentPath] ? " (under #{r[:parentPath]})" : ""
627
+ label = r[:label] || r[:path].split("/").last&.capitalize || "Page"
628
+ "- #{r[:path]} \u2014 #{label}#{parent_note}"
629
+ end
630
+
631
+ "## Available Application Pages\n#{lines.join("\n")}"
632
+ end
633
+ end
634
+ end
635
+ end