sql-chatbot-rails 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +20 -0
- data/app/controllers/sql_chatbot/chatbot_controller.rb +158 -0
- data/config/routes.rb +11 -0
- data/lib/generators/sql_chatbot/install_generator.rb +25 -0
- data/lib/generators/sql_chatbot/templates/initializer.rb +22 -0
- data/lib/sql_chatbot/auth/cors.rb +35 -0
- data/lib/sql_chatbot/auth/jwt.rb +34 -0
- data/lib/sql_chatbot/configuration.rb +58 -0
- data/lib/sql_chatbot/engine.rb +23 -0
- data/lib/sql_chatbot/grammar/count_renderer.rb +113 -0
- data/lib/sql_chatbot/grammar/entity_candidates.rb +210 -0
- data/lib/sql_chatbot/grammar/intent_extractor.rb +191 -0
- data/lib/sql_chatbot/grammar/list_renderer.rb +50 -0
- data/lib/sql_chatbot/grammar/miss_logger.rb +17 -0
- data/lib/sql_chatbot/grammar/modifiers.rb +145 -0
- data/lib/sql_chatbot/grammar/primitives.rb +69 -0
- data/lib/sql_chatbot/grammar/programmatic_renderer.rb +258 -0
- data/lib/sql_chatbot/grammar/registry.rb +66 -0
- data/lib/sql_chatbot/grammar/sanity_check.rb +37 -0
- data/lib/sql_chatbot/grammar/template_compiler.rb +179 -0
- data/lib/sql_chatbot/llm/client.rb +87 -0
- data/lib/sql_chatbot/prompts/answer.rb +157 -0
- data/lib/sql_chatbot/prompts/classify.rb +59 -0
- data/lib/sql_chatbot/prompts/generate_sql.rb +88 -0
- data/lib/sql_chatbot/services/code_indexer.rb +337 -0
- data/lib/sql_chatbot/services/grammar_pipeline.rb +45 -0
- data/lib/sql_chatbot/services/model_introspector.rb +152 -0
- data/lib/sql_chatbot/services/orchestrator.rb +635 -0
- data/lib/sql_chatbot/services/registry_builder.rb +385 -0
- data/lib/sql_chatbot/services/route_introspector.rb +118 -0
- data/lib/sql_chatbot/services/schema_service.rb +884 -0
- data/lib/sql_chatbot/services/sql_executor.rb +81 -0
- data/lib/sql_chatbot/version.rb +5 -0
- data/lib/sql_chatbot_rails.rb +91 -0
- data/vendor/assets/widget.js +53 -0
- metadata +180 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sql_chatbot/grammar/registry"
|
|
4
|
+
|
|
5
|
+
module SqlChatbot
|
|
6
|
+
module Grammar
|
|
7
|
+
module Primitives
|
|
8
|
+
PREFERRED_DISPLAY_FIELDS = %w[id name title label email].freeze
|
|
9
|
+
|
|
10
|
+
# Quote a single SQL identifier — wraps in double quotes and escapes
|
|
11
|
+
# embedded quotes. Prevents PG reserved-word collisions ("user", "order"
|
|
12
|
+
# etc.) which silently resolve to functions and corrupt counts.
|
|
13
|
+
def self.q(name)
|
|
14
|
+
%("#{name.to_s.gsub('"', '""')}")
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Qualified column reference: "table"."column"
|
|
18
|
+
def self.qc(table, col)
|
|
19
|
+
"#{q(table)}.#{q(col)}"
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def self.build(primitive:, entity:, field: nil, which: nil, n: nil, rank_field: nil, direction: nil, group_by: nil)
|
|
23
|
+
t = q(entity.table)
|
|
24
|
+
case primitive
|
|
25
|
+
when :COUNT
|
|
26
|
+
"SELECT COUNT(*) FROM #{t}"
|
|
27
|
+
when :LIST
|
|
28
|
+
"SELECT #{pick_display_fields(entity).map { |c| q(c) }.join(", ")} FROM #{t}"
|
|
29
|
+
when :SUM
|
|
30
|
+
require_field!(entity, field, "SUM")
|
|
31
|
+
"SELECT SUM(#{qc(entity.table, field)}) FROM #{t}"
|
|
32
|
+
when :AVG
|
|
33
|
+
require_field!(entity, field, "AVG")
|
|
34
|
+
"SELECT ROUND(AVG(#{qc(entity.table, field)}), 2) FROM #{t}"
|
|
35
|
+
when :MIN_MAX
|
|
36
|
+
require_field!(entity, field, "MIN_MAX")
|
|
37
|
+
raise "MIN_MAX requires which" unless %i[MIN MAX].include?(which)
|
|
38
|
+
"SELECT #{which}(#{qc(entity.table, field)}) FROM #{t}"
|
|
39
|
+
when :TOP_N
|
|
40
|
+
rank = rank_field || entity.ranking_candidates.first
|
|
41
|
+
raise "TOP_N requires rankField" unless rank
|
|
42
|
+
limit = n || 10
|
|
43
|
+
dir = (direction || "desc").to_s.upcase
|
|
44
|
+
# V1.3-V: NULLS LAST in both directions. PG default for DESC is
|
|
45
|
+
# NULLS FIRST, which surfaced a NULL-rank row for 2BN's
|
|
46
|
+
# "biggest review by rating" — answer LLM rendered "N/A".
|
|
47
|
+
# NULL rank is never the answer the user wants.
|
|
48
|
+
"SELECT #{pick_display_fields(entity).map { |c| q(c) }.join(", ")}, #{qc(entity.table, rank)} FROM #{t} ORDER BY #{qc(entity.table, rank)} #{dir} NULLS LAST LIMIT #{limit}"
|
|
49
|
+
when :RANK
|
|
50
|
+
raise "RANK requires rankField and groupBy" unless rank_field && group_by
|
|
51
|
+
"SELECT #{t}.*, DENSE_RANK() OVER (PARTITION BY #{qc(entity.table, group_by)} ORDER BY #{qc(entity.table, rank_field)} DESC) AS rank FROM #{t}"
|
|
52
|
+
else
|
|
53
|
+
raise "unknown primitive #{primitive}"
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.pick_display_fields(entity)
|
|
58
|
+
present = PREFERRED_DISPLAY_FIELDS.select { |p| entity.fields.key?(p) }
|
|
59
|
+
return present.map { |p| entity.fields[p].column } if present.any?
|
|
60
|
+
entity.fields.keys.first(4)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.require_field!(entity, field, name)
|
|
64
|
+
raise "#{name} requires field" unless field
|
|
65
|
+
raise "#{name} field '#{field}' not in entity" unless entity.fields.key?(field.to_s)
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sql_chatbot/grammar/count_renderer"
|
|
4
|
+
require "sql_chatbot/grammar/list_renderer"
|
|
5
|
+
|
|
6
|
+
module SqlChatbot
|
|
7
|
+
module Grammar
|
|
8
|
+
# Unified programmatic-renderer registry. Mirror of npm's
|
|
9
|
+
# programmatic-renderer.ts. The grammar's intent extractor emits a
|
|
10
|
+
# primitive (COUNT, LIST, TOP_N, …); this module renders the result
|
|
11
|
+
# deterministically when it can — bypassing the answer-stream LLM
|
|
12
|
+
# which truncates lists, mis-narrates count=0 as "no records", and
|
|
13
|
+
# hallucinates empty TOP_N results when the metric is all zero.
|
|
14
|
+
#
|
|
15
|
+
# Adding a new primitive renderer (SUM, AVG, MIN_MAX, RANK) means:
|
|
16
|
+
# write a pure handler and add it to HANDLERS. No new files.
|
|
17
|
+
module ProgrammaticRenderer
|
|
18
|
+
def self.try_render(primitive, entity_display_label, rows, rank_field: nil, field: nil, which: nil)
|
|
19
|
+
return { ok: false } if primitive.nil?
|
|
20
|
+
handler = HANDLERS[primitive.to_s]
|
|
21
|
+
return { ok: false } unless handler
|
|
22
|
+
handler.call(entity_display_label, rows, rank_field, field, which)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# COUNT — one row, one numeric `count` column. Delegates to existing
|
|
26
|
+
# CountRenderer (already in unified shape).
|
|
27
|
+
HANDLE_COUNT = ->(label, rows, _rank, _field, _which) {
|
|
28
|
+
CountRenderer.try_render("COUNT", label, rows)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
# LIST — small result, picks readable label per row. Delegates.
|
|
32
|
+
HANDLE_LIST = ->(label, rows, _rank, _field, _which) {
|
|
33
|
+
ListRenderer.try_render("LIST", label, rows)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
TOPN_THRESHOLD = 10
|
|
37
|
+
PREFERRED_LABEL_KEYS = %w[title name label subject email username].freeze
|
|
38
|
+
|
|
39
|
+
# TOP_N — small ordered result, label + rank value per row. The
|
|
40
|
+
# Gitea sweep failure: "biggest repo by stars" returned 5 rows with
|
|
41
|
+
# all num_stars=0; the answer LLM rendered "No matching records."
|
|
42
|
+
# Programmatic render shows the data deterministically.
|
|
43
|
+
HANDLE_TOPN = ->(label, rows, rank_field, _field, _which) {
|
|
44
|
+
next { ok: false } unless rows.is_a?(Array)
|
|
45
|
+
next { ok: true, text: ProgrammaticRenderer.empty_text(label) } if rows.empty?
|
|
46
|
+
next { ok: false } if rows.length > TOPN_THRESHOLD
|
|
47
|
+
|
|
48
|
+
formatted = []
|
|
49
|
+
rows.each do |row|
|
|
50
|
+
next { ok: false } unless row.is_a?(Hash)
|
|
51
|
+
lbl = ProgrammaticRenderer.pick_row_label(row)
|
|
52
|
+
break { ok: false } unless lbl
|
|
53
|
+
rank_value = ProgrammaticRenderer.pick_rank_value(row, rank_field)
|
|
54
|
+
formatted << { label: lbl, rank: rank_value }
|
|
55
|
+
end
|
|
56
|
+
next formatted if formatted.is_a?(Hash) # short-circuit { ok: false }
|
|
57
|
+
|
|
58
|
+
label_or_item = label.to_s.empty? ? "item" : label.to_s
|
|
59
|
+
noun = rows.length == 1 ? CountRenderer.to_singular_label(label_or_item) : CountRenderer.to_plural_label(label_or_item)
|
|
60
|
+
by_part = rank_field ? " by #{rank_field}" : ""
|
|
61
|
+
intro = rows.length == 1 ? "Top #{noun}#{by_part}:" : "Top #{rows.length} #{noun}#{by_part}:"
|
|
62
|
+
lines = formatted.map { |f| f[:rank] ? "- #{f[:label]} (#{f[:rank]})" : "- #{f[:label]}" }.join("\n")
|
|
63
|
+
{ ok: true, text: "#{intro}\n#{lines}" }
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# SUM / AVG / MIN_MAX — one row, one numeric value. Mirror of TS
|
|
67
|
+
# handlers in programmatic-renderer.ts (V1.3-P / Fix G).
|
|
68
|
+
MONEY_HINTS = %w[amount price cost revenue total gross net fee].freeze
|
|
69
|
+
|
|
70
|
+
HANDLE_SUM = ->(label, rows, _rank, field, _which) {
|
|
71
|
+
value = ProgrammaticRenderer.first_aggregate_value(rows)
|
|
72
|
+
next { ok: false } if value == :no_value
|
|
73
|
+
formatted = ProgrammaticRenderer.format_aggregate_value(field, value)
|
|
74
|
+
next { ok: true, text: "No data available to sum." } if formatted.nil?
|
|
75
|
+
plural = CountRenderer.to_plural_label(label.to_s.empty? ? "item" : label.to_s)
|
|
76
|
+
{ ok: true, text: "Total #{ProgrammaticRenderer.humanize_field(field)} across #{plural}: #{formatted}" }
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
HANDLE_AVG = ->(label, rows, _rank, field, _which) {
|
|
80
|
+
value = ProgrammaticRenderer.first_aggregate_value(rows)
|
|
81
|
+
next { ok: false } if value == :no_value
|
|
82
|
+
formatted = ProgrammaticRenderer.format_aggregate_value(field, value)
|
|
83
|
+
next { ok: true, text: "No data available to average." } if formatted.nil?
|
|
84
|
+
plural = CountRenderer.to_plural_label(label.to_s.empty? ? "item" : label.to_s)
|
|
85
|
+
{ ok: true, text: "Average #{ProgrammaticRenderer.humanize_field(field)} across #{plural}: #{formatted}" }
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
HANDLE_MIN_MAX = ->(label, rows, _rank, field, which) {
|
|
89
|
+
value = ProgrammaticRenderer.first_aggregate_value(rows)
|
|
90
|
+
next { ok: false } if value == :no_value
|
|
91
|
+
formatted = ProgrammaticRenderer.format_aggregate_value(field, value)
|
|
92
|
+
next { ok: true, text: "No data available." } if formatted.nil?
|
|
93
|
+
plural = CountRenderer.to_plural_label(label.to_s.empty? ? "item" : label.to_s)
|
|
94
|
+
adj = which.to_s == "MIN" ? "Lowest" : "Highest"
|
|
95
|
+
{ ok: true, text: "#{adj} #{ProgrammaticRenderer.humanize_field(field)} across #{plural}: #{formatted}" }
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
HANDLERS = {
|
|
99
|
+
"COUNT" => HANDLE_COUNT,
|
|
100
|
+
"LIST" => HANDLE_LIST,
|
|
101
|
+
"TOP_N" => HANDLE_TOPN,
|
|
102
|
+
"SUM" => HANDLE_SUM,
|
|
103
|
+
"AVG" => HANDLE_AVG,
|
|
104
|
+
"MIN_MAX" => HANDLE_MIN_MAX,
|
|
105
|
+
}.freeze
|
|
106
|
+
|
|
107
|
+
def self.first_aggregate_value(rows)
|
|
108
|
+
return :no_value unless rows.is_a?(Array) && rows.length == 1
|
|
109
|
+
row = rows.first
|
|
110
|
+
return :no_value unless row.is_a?(Hash)
|
|
111
|
+
return :no_value if row.empty? || row.size > 1
|
|
112
|
+
row.values.first
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def self.format_aggregate_value(field, value)
|
|
116
|
+
return nil if value.nil?
|
|
117
|
+
n =
|
|
118
|
+
case value
|
|
119
|
+
when Numeric then value
|
|
120
|
+
when String then (value =~ /\A-?\d+(\.\d+)?\z/) ? value.to_f : nil
|
|
121
|
+
end
|
|
122
|
+
return nil unless n
|
|
123
|
+
if looks_like_money?(field)
|
|
124
|
+
# Format with thousands separators and 2 decimals, prefix $.
|
|
125
|
+
whole = n.truncate.to_s.reverse.scan(/\d{1,3}/).join(",").reverse
|
|
126
|
+
decimals = ((n - n.truncate).round(2).abs * 100).round.to_s.rjust(2, "0")
|
|
127
|
+
"$#{whole}.#{decimals}"
|
|
128
|
+
else
|
|
129
|
+
if n == n.to_i
|
|
130
|
+
n.to_i.to_s.reverse.scan(/\d{1,3}/).join(",").reverse
|
|
131
|
+
else
|
|
132
|
+
rounded = (n * 100).round / 100.0
|
|
133
|
+
whole = rounded.truncate.to_s.reverse.scan(/\d{1,3}/).join(",").reverse
|
|
134
|
+
frac = ((rounded - rounded.truncate).abs * 100).round.to_s.rjust(2, "0").gsub(/0$/, "")
|
|
135
|
+
frac.empty? ? whole : "#{whole}.#{frac}"
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def self.looks_like_money?(field)
|
|
141
|
+
return false if field.nil?
|
|
142
|
+
f = field.to_s.downcase
|
|
143
|
+
MONEY_HINTS.any? { |h| f.include?(h) }
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def self.humanize_field(field)
|
|
147
|
+
return "value" if field.nil? || field.to_s.empty?
|
|
148
|
+
field.to_s.tr("_", " ")
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def self.pick_row_label(row)
|
|
152
|
+
PREFERRED_LABEL_KEYS.each do |k|
|
|
153
|
+
v = row[k] || row[k.to_sym]
|
|
154
|
+
return v.strip if v.is_a?(String) && !v.strip.empty?
|
|
155
|
+
end
|
|
156
|
+
row.each do |k, v|
|
|
157
|
+
next if k.to_s == "id"
|
|
158
|
+
return v.strip if v.is_a?(String) && !v.strip.empty?
|
|
159
|
+
end
|
|
160
|
+
nil
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def self.pick_rank_value(row, rank_field)
|
|
164
|
+
if rank_field
|
|
165
|
+
v = row[rank_field] || row[rank_field.to_s] || row[rank_field.to_sym]
|
|
166
|
+
return format_scalar(v) if v
|
|
167
|
+
end
|
|
168
|
+
row.each do |k, v|
|
|
169
|
+
next if %w[id count].include?(k.to_s)
|
|
170
|
+
if v.is_a?(Numeric) || (v.is_a?(String) && v =~ /\A-?\d+(\.\d+)?\z/)
|
|
171
|
+
return format_scalar(v)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
nil
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def self.format_scalar(v)
|
|
178
|
+
return nil if v.nil?
|
|
179
|
+
return v.to_s if v.is_a?(Numeric)
|
|
180
|
+
return v if v.is_a?(String)
|
|
181
|
+
nil
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
# =========================================================
|
|
185
|
+
# Bug D guard for the LLM SQL path. Mirror of TS
|
|
186
|
+
# renderEmptyForLlmSql. Hard sweep on 2026-04-28 surfaced
|
|
187
|
+
# five questions where grammar missed → LLM SQL → 0 rows /
|
|
188
|
+
# COUNT(*)=0 / NULL aggregate → answer LLM emitted
|
|
189
|
+
# "No matching records found." even when a shape-aware
|
|
190
|
+
# programmatic rendering ("There are 0 X.", "No Xs found.")
|
|
191
|
+
# would be more honest. Returns nil when there *is* data —
|
|
192
|
+
# caller falls through to the answer LLM.
|
|
193
|
+
# =========================================================
|
|
194
|
+
|
|
195
|
+
COUNT_RE = /SELECT\s+COUNT\s*\(/i
|
|
196
|
+
AGGREGATE_RE = /SELECT\s+(SUM|AVG|MIN|MAX)\s*\(/i
|
|
197
|
+
FROM_TABLE_RE = /\bFROM\s+(?:"?[\w$]+"?\s*\.\s*)?"?([\w$]+)"?/i
|
|
198
|
+
|
|
199
|
+
def self.render_empty_for_llm_sql(_question, sql, rows)
|
|
200
|
+
is_count = sql =~ COUNT_RE
|
|
201
|
+
is_aggregate = sql =~ AGGREGATE_RE
|
|
202
|
+
|
|
203
|
+
if is_count && rows.is_a?(Array) && rows.length == 1
|
|
204
|
+
val = first_scalar(rows[0])
|
|
205
|
+
n = as_number(val)
|
|
206
|
+
if n == 0
|
|
207
|
+
label = entity_from_from(sql)
|
|
208
|
+
return label ? "There are 0 #{CountRenderer.to_plural_label(label)}." : "There are 0 matching items."
|
|
209
|
+
end
|
|
210
|
+
return nil
|
|
211
|
+
end
|
|
212
|
+
|
|
213
|
+
if is_aggregate && rows.is_a?(Array) && rows.length == 1
|
|
214
|
+
val = first_scalar(rows[0])
|
|
215
|
+
return "No data available for that question." if val.nil?
|
|
216
|
+
return nil
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
if rows.is_a?(Array) && rows.empty?
|
|
220
|
+
label = entity_from_from(sql)
|
|
221
|
+
return label ? "No #{CountRenderer.to_plural_label(label)} found." : "I didn't find anything matching that — could you rephrase or be more specific?"
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
nil
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def self.first_scalar(row)
|
|
228
|
+
return nil unless row.is_a?(Hash) && !row.empty?
|
|
229
|
+
row.values.first
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
def self.as_number(v)
|
|
233
|
+
return v if v.is_a?(Numeric)
|
|
234
|
+
return v.to_i if v.is_a?(String) && v =~ /\A-?\d+\z/
|
|
235
|
+
nil
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def self.entity_from_from(sql)
|
|
239
|
+
m = sql.match(FROM_TABLE_RE)
|
|
240
|
+
return nil unless m && m[1]
|
|
241
|
+
m[1].split("_").reject(&:empty?).map { |w| w[0].upcase + w[1..].to_s.downcase }.join(" ")
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Shared empty-text helper used by HANDLE_TOPN, ListRenderer, and
|
|
245
|
+
# render_empty_for_llm_sql so the user-facing string is consistent:
|
|
246
|
+
# "No <Plural> found." instead of the V1.2 Bug D phrase
|
|
247
|
+
# "No matching records found." which the user has flagged on the
|
|
248
|
+
# 2026-04-28 hard sweep as misleading.
|
|
249
|
+
def self.empty_text(entity_display_label)
|
|
250
|
+
if entity_display_label && !entity_display_label.to_s.strip.empty?
|
|
251
|
+
"No #{CountRenderer.to_plural_label(entity_display_label.to_s)} found."
|
|
252
|
+
else
|
|
253
|
+
"I didn't find anything matching that — could you rephrase or be more specific?"
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
end
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module SqlChatbot
|
|
6
|
+
module Grammar
|
|
7
|
+
Entity = Struct.new(:name, :table, :display_label, :row_count, :primary_key,
|
|
8
|
+
:timestamps, :fields, :scopes, :associations, :ranking_candidates,
|
|
9
|
+
:implicit_filters,
|
|
10
|
+
keyword_init: true) do
|
|
11
|
+
def initialize(**kwargs)
|
|
12
|
+
super(
|
|
13
|
+
name: kwargs[:name],
|
|
14
|
+
table: kwargs[:table],
|
|
15
|
+
display_label: kwargs[:display_label] || kwargs[:name]&.capitalize,
|
|
16
|
+
row_count: kwargs[:row_count] || 0,
|
|
17
|
+
primary_key: kwargs[:primary_key] || "id",
|
|
18
|
+
timestamps: kwargs[:timestamps] || {},
|
|
19
|
+
fields: kwargs[:fields] || {},
|
|
20
|
+
scopes: kwargs[:scopes] || {},
|
|
21
|
+
associations: kwargs[:associations] || {},
|
|
22
|
+
ranking_candidates: kwargs[:ranking_candidates] || [],
|
|
23
|
+
implicit_filters: kwargs[:implicit_filters] || []
|
|
24
|
+
)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# WHERE clauses appended to every SELECT for an entity. Two sources feed
|
|
29
|
+
# this list:
|
|
30
|
+
# - Schema detection: e.g. `deleted_at` → `{ column: "deleted_at", expr: "IS NULL", source: :soft_delete }`
|
|
31
|
+
# - Developer config (`default_filters`): e.g. MSP convention `*.status != 3`.
|
|
32
|
+
# The compiler emits each entry as `"<table>"."<column>" <expr>` and skips
|
|
33
|
+
# any entry whose column is already referenced in the generated SQL.
|
|
34
|
+
ImplicitFilter = Struct.new(:column, :expr, :source, keyword_init: true)
|
|
35
|
+
|
|
36
|
+
Field = Struct.new(:column, :type, :nullable, :enum_values, :fk_to,
|
|
37
|
+
:user_facing_label, :searchable, keyword_init: true)
|
|
38
|
+
|
|
39
|
+
Scope = Struct.new(:name, :where_clause, :param_slots, keyword_init: true)
|
|
40
|
+
|
|
41
|
+
Association = Struct.new(:name, :kind, :target_entity, :join_clause,
|
|
42
|
+
:through_entity, keyword_init: true)
|
|
43
|
+
|
|
44
|
+
class Registry
|
|
45
|
+
attr_reader :entities, :aliases, :version, :generated_at, :framework
|
|
46
|
+
|
|
47
|
+
def initialize(framework:, entities: {}, aliases: {})
|
|
48
|
+
@framework = framework
|
|
49
|
+
@entities = entities
|
|
50
|
+
@aliases = aliases
|
|
51
|
+
@version = 1
|
|
52
|
+
@generated_at = Time.now.utc.iso8601
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def find_entity(name)
|
|
56
|
+
@entities[name.to_s]
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def resolve_alias(term)
|
|
60
|
+
return @aliases[term.to_s] if @aliases.key?(term.to_s)
|
|
61
|
+
return term.to_s if @entities.key?(term.to_s)
|
|
62
|
+
nil
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlChatbot
|
|
4
|
+
module Grammar
|
|
5
|
+
# Post-execution sanity check for grammar-generated SQL.
|
|
6
|
+
#
|
|
7
|
+
# Catches "plausible but wrong" results where the SQL ran without error
|
|
8
|
+
# but the value disagrees with the registry's known row count. Concrete
|
|
9
|
+
# case: Gitea's `user` reserved-word table returned 1 instead of 9.
|
|
10
|
+
module SanityCheck
|
|
11
|
+
# Returns { ok: true } when result matches expectations,
|
|
12
|
+
# or { ok: false, reason: "..." } when there's a mismatch.
|
|
13
|
+
def self.check_count(primitive, entity, result_rows)
|
|
14
|
+
return { ok: true } unless primitive.to_s == "COUNT"
|
|
15
|
+
return { ok: true } unless result_rows.is_a?(Array) && result_rows.length == 1
|
|
16
|
+
|
|
17
|
+
row = result_rows.first
|
|
18
|
+
v = row.is_a?(Hash) ? row.values.first : nil
|
|
19
|
+
got = Integer(v.to_s) rescue nil
|
|
20
|
+
return { ok: true } unless got
|
|
21
|
+
|
|
22
|
+
expected = entity.row_count.to_i
|
|
23
|
+
# Trust the registry only when it has a non-trivial value.
|
|
24
|
+
# Tables with reltuples == 0 might just be stats-stale.
|
|
25
|
+
return { ok: true } if expected <= 5
|
|
26
|
+
|
|
27
|
+
if got < expected / 3 || got > expected * 3
|
|
28
|
+
return {
|
|
29
|
+
ok: false,
|
|
30
|
+
reason: "count_mismatch: SQL returned #{got}, registry has ~#{expected} rows in #{entity.table}",
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
{ ok: true }
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "sql_chatbot/grammar/registry"
|
|
4
|
+
require "sql_chatbot/grammar/primitives"
|
|
5
|
+
require "sql_chatbot/grammar/modifiers"
|
|
6
|
+
|
|
7
|
+
module SqlChatbot
|
|
8
|
+
module Grammar
|
|
9
|
+
module TemplateCompiler
|
|
10
|
+
def self.compile(intent, registry)
|
|
11
|
+
return { ok: false, reason: "unmatched: #{intent[:reason]}" } if intent[:status].to_s == "unmatched"
|
|
12
|
+
|
|
13
|
+
entity_name = registry.aliases[intent[:entity]] || intent[:entity]
|
|
14
|
+
entity = registry.entities[entity_name]
|
|
15
|
+
return { ok: false, reason: "entity '#{intent[:entity]}' not in registry" } unless entity
|
|
16
|
+
|
|
17
|
+
begin
|
|
18
|
+
modifiers = Array(intent[:modifiers])
|
|
19
|
+
primitive_sym = intent[:primitive].to_s
|
|
20
|
+
rank_field = intent[:rank_field]
|
|
21
|
+
limit_n = intent[:n]
|
|
22
|
+
|
|
23
|
+
# V1.3-K shape-repair pass — three real-world LLM emit-fidelity issues
|
|
24
|
+
# captured during the 2026-04-28 hard sweep. Mirror of TS
|
|
25
|
+
# `normalizeIntentShape` in template-compiler.ts.
|
|
26
|
+
modifiers = normalize_intent_shape(modifiers, entity)
|
|
27
|
+
|
|
28
|
+
# TOP_N / MIN_MAX express the rank/field through an `order_by`
|
|
29
|
+
# modifier when the LLM phrases it that way ("biggest repo by stars"
|
|
30
|
+
# → primitive=TOP_N + order_by(num_stars,desc)). Lift the
|
|
31
|
+
# modifier's field into the primitive slot and drop it so the
|
|
32
|
+
# compiler doesn't emit ORDER BY twice (which produces a Postgres
|
|
33
|
+
# syntax error and silently pushes the question to the LLM
|
|
34
|
+
# fallback path). Write the lifted values back to the intent so
|
|
35
|
+
# downstream consumers (programmatic renderer) can read the
|
|
36
|
+
# canonical rank_field/field regardless of which surface the LLM
|
|
37
|
+
# emitted.
|
|
38
|
+
field = intent[:field]
|
|
39
|
+
which = intent[:which]
|
|
40
|
+
direction = intent[:direction]
|
|
41
|
+
if primitive_sym == "TOP_N" || primitive_sym == "MIN_MAX"
|
|
42
|
+
order_mod = modifiers.find { |m| (m[:kind] || m["kind"]).to_s == "order_by" }
|
|
43
|
+
if order_mod
|
|
44
|
+
lifted_field = order_mod[:field] || order_mod["field"]
|
|
45
|
+
raw_dir = order_mod[:direction] || order_mod["direction"] || order_mod[:op] || order_mod["op"] || "desc"
|
|
46
|
+
if primitive_sym == "TOP_N"
|
|
47
|
+
rank_field ||= lifted_field
|
|
48
|
+
# V1.3-U: pass through the lifted direction so "smallest X"
|
|
49
|
+
# becomes ASC, mirroring TS liftOrderByForRank.
|
|
50
|
+
direction ||= raw_dir.to_s.downcase
|
|
51
|
+
else
|
|
52
|
+
field ||= lifted_field
|
|
53
|
+
which ||= raw_dir.to_s.downcase == "asc" ? :MIN : :MAX
|
|
54
|
+
end
|
|
55
|
+
modifiers = modifiers.reject { |m| m.equal?(order_mod) }
|
|
56
|
+
end
|
|
57
|
+
limit_mod = modifiers.find { |m| (m[:kind] || m["kind"]).to_s == "limit" }
|
|
58
|
+
if limit_mod && primitive_sym == "TOP_N"
|
|
59
|
+
limit_n ||= limit_mod[:value] || limit_mod["value"]
|
|
60
|
+
modifiers = modifiers.reject { |m| m.equal?(limit_mod) }
|
|
61
|
+
end
|
|
62
|
+
intent[:rank_field] = rank_field
|
|
63
|
+
intent[:field] = field
|
|
64
|
+
intent[:which] = which
|
|
65
|
+
intent[:direction] = direction
|
|
66
|
+
intent[:modifiers] = modifiers
|
|
67
|
+
intent[:n] = limit_n
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
sql = Primitives.build(
|
|
71
|
+
primitive: intent[:primitive],
|
|
72
|
+
entity: entity,
|
|
73
|
+
field: field,
|
|
74
|
+
which: which,
|
|
75
|
+
n: limit_n,
|
|
76
|
+
rank_field: rank_field,
|
|
77
|
+
direction: direction,
|
|
78
|
+
group_by: intent[:group_by]
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
modifiers.each do |m|
|
|
82
|
+
sql = Modifiers.apply(sql, m, entity)
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
sql = with_implicit_filters(sql, entity)
|
|
86
|
+
|
|
87
|
+
unless sql =~ /LIMIT \d+/i || primitive_sym == "COUNT" || sql =~ /COUNT\(/i
|
|
88
|
+
sql = "#{sql} LIMIT 100"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
{ ok: true, sql: sql }
|
|
92
|
+
rescue => e
|
|
93
|
+
{ ok: false, reason: e.message }
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Append every implicit WHERE clause for this entity. Two sources land
|
|
98
|
+
# in the same list: schema-detected soft-delete columns and developer-
|
|
99
|
+
# declared `default_filters`. A clause is skipped when its column is
|
|
100
|
+
# already referenced in the generated SQL — explicit user filters always
|
|
101
|
+
# win over the implicit default.
|
|
102
|
+
def self.with_implicit_filters(sql, entity)
|
|
103
|
+
filters = Array(entity.respond_to?(:implicit_filters) ? entity.implicit_filters : nil)
|
|
104
|
+
return sql if filters.empty?
|
|
105
|
+
|
|
106
|
+
to_apply = filters.reject { |f| column_already_filtered?(sql, entity.table, f.column) }
|
|
107
|
+
return sql if to_apply.empty?
|
|
108
|
+
|
|
109
|
+
combined = to_apply.map { |f| "#{Primitives.qc(entity.table, f.column)} #{f.expr}" }.join(" AND ")
|
|
110
|
+
if /\bWHERE\b/i.match?(sql)
|
|
111
|
+
return sql.sub(/\bWHERE\b/i) { "WHERE #{combined} AND " }
|
|
112
|
+
end
|
|
113
|
+
before_match = sql.match(/ (GROUP BY|ORDER BY|LIMIT) /i)
|
|
114
|
+
if before_match
|
|
115
|
+
return sql.sub(before_match[0]) { " WHERE #{combined}#{before_match[0]}" }
|
|
116
|
+
end
|
|
117
|
+
"#{sql} WHERE #{combined}"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def self.column_already_filtered?(sql, table, col)
|
|
121
|
+
return true if sql.include?(Primitives.qc(table, col))
|
|
122
|
+
/\b#{Regexp.escape(table)}\.#{Regexp.escape(col)}\b/i.match?(sql)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# =====================================================================
|
|
126
|
+
# V1.3-K shape-repair pass — mirror of TS normalizeIntentShape.
|
|
127
|
+
#
|
|
128
|
+
# 1. `where` modifier whose value is a WINDOWS keyword
|
|
129
|
+
# → coerce to a `time` modifier with that window.
|
|
130
|
+
# 2. `time` modifier where the LLM used `value` for the keyword
|
|
131
|
+
# instead of `window` → alias the key.
|
|
132
|
+
# 3. `time` modifier missing field entirely → default to created_at
|
|
133
|
+
# if it exists, else the lone timestamp column.
|
|
134
|
+
# =====================================================================
|
|
135
|
+
def self.normalize_intent_shape(modifiers, entity)
|
|
136
|
+
modifiers.map { |m| repair_one(m, entity) }
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def self.repair_one(modifier, entity)
|
|
140
|
+
kind = (modifier[:kind] || modifier["kind"]).to_s
|
|
141
|
+
value = modifier[:value] || modifier["value"]
|
|
142
|
+
field = modifier[:field] || modifier["field"]
|
|
143
|
+
window = modifier[:window] || modifier["window"]
|
|
144
|
+
|
|
145
|
+
# Rule 1: where with WINDOWS keyword → time
|
|
146
|
+
if kind == "where" && value.is_a?(String) && SqlChatbot::Grammar::Modifiers::WINDOWS.key?(value.downcase)
|
|
147
|
+
return {
|
|
148
|
+
kind: "time",
|
|
149
|
+
field: field || default_timestamp_field(entity) || "",
|
|
150
|
+
window: value.downcase,
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Rule 2: time with `value` instead of `window`
|
|
155
|
+
if kind == "time" && (window.nil? || window.to_s.empty?) && value.is_a?(String) && SqlChatbot::Grammar::Modifiers::WINDOWS.key?(value.downcase)
|
|
156
|
+
return {
|
|
157
|
+
kind: "time",
|
|
158
|
+
field: field || default_timestamp_field(entity) || "",
|
|
159
|
+
window: value.downcase,
|
|
160
|
+
}
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Rule 3: time missing field → default
|
|
164
|
+
if kind == "time" && (field.nil? || field.to_s.empty?) && window
|
|
165
|
+
fallback = default_timestamp_field(entity)
|
|
166
|
+
return { kind: "time", field: fallback, window: window } if fallback
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
modifier
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
def self.default_timestamp_field(entity)
|
|
173
|
+
return "created_at" if entity.fields["created_at"]
|
|
174
|
+
ts_fields = entity.fields.values.select { |f| f.type.to_s == "timestamp" }
|
|
175
|
+
ts_fields.length == 1 ? ts_fields.first.column : nil
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
end
|