sql-chatbot-rails 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +20 -0
  4. data/app/controllers/sql_chatbot/chatbot_controller.rb +158 -0
  5. data/config/routes.rb +11 -0
  6. data/lib/generators/sql_chatbot/install_generator.rb +25 -0
  7. data/lib/generators/sql_chatbot/templates/initializer.rb +22 -0
  8. data/lib/sql_chatbot/auth/cors.rb +35 -0
  9. data/lib/sql_chatbot/auth/jwt.rb +34 -0
  10. data/lib/sql_chatbot/configuration.rb +58 -0
  11. data/lib/sql_chatbot/engine.rb +23 -0
  12. data/lib/sql_chatbot/grammar/count_renderer.rb +113 -0
  13. data/lib/sql_chatbot/grammar/entity_candidates.rb +210 -0
  14. data/lib/sql_chatbot/grammar/intent_extractor.rb +191 -0
  15. data/lib/sql_chatbot/grammar/list_renderer.rb +50 -0
  16. data/lib/sql_chatbot/grammar/miss_logger.rb +17 -0
  17. data/lib/sql_chatbot/grammar/modifiers.rb +145 -0
  18. data/lib/sql_chatbot/grammar/primitives.rb +69 -0
  19. data/lib/sql_chatbot/grammar/programmatic_renderer.rb +258 -0
  20. data/lib/sql_chatbot/grammar/registry.rb +66 -0
  21. data/lib/sql_chatbot/grammar/sanity_check.rb +37 -0
  22. data/lib/sql_chatbot/grammar/template_compiler.rb +179 -0
  23. data/lib/sql_chatbot/llm/client.rb +87 -0
  24. data/lib/sql_chatbot/prompts/answer.rb +157 -0
  25. data/lib/sql_chatbot/prompts/classify.rb +59 -0
  26. data/lib/sql_chatbot/prompts/generate_sql.rb +88 -0
  27. data/lib/sql_chatbot/services/code_indexer.rb +337 -0
  28. data/lib/sql_chatbot/services/grammar_pipeline.rb +45 -0
  29. data/lib/sql_chatbot/services/model_introspector.rb +152 -0
  30. data/lib/sql_chatbot/services/orchestrator.rb +635 -0
  31. data/lib/sql_chatbot/services/registry_builder.rb +385 -0
  32. data/lib/sql_chatbot/services/route_introspector.rb +118 -0
  33. data/lib/sql_chatbot/services/schema_service.rb +884 -0
  34. data/lib/sql_chatbot/services/sql_executor.rb +81 -0
  35. data/lib/sql_chatbot/version.rb +5 -0
  36. data/lib/sql_chatbot_rails.rb +91 -0
  37. data/vendor/assets/widget.js +53 -0
  38. metadata +180 -0
@@ -0,0 +1,385 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "set"
4
+ require "sql_chatbot/grammar/registry"
5
+
6
+ module SqlChatbot
7
+ module Services
8
+ class RegistryBuilder
9
+ SOFT_DELETE_COLS = %w[deleted_at discarded_at archived_at removed_at].freeze
10
+
11
+ def initialize(default_filters: nil, custom_aliases: nil)
12
+ @default_filters = default_filters || {}
13
+ @custom_aliases = custom_aliases || {}
14
+ end
15
+
16
+ def build
17
+ entities = {}
18
+ discover_models.each do |model|
19
+ entity_name = model.name.underscore
20
+ entities[entity_name] = build_entity(model, entity_name)
21
+ end
22
+ registry = Grammar::Registry.new(
23
+ framework: "rails",
24
+ entities: entities,
25
+ aliases: build_entity_aliases(entities),
26
+ )
27
+ apply_domain_synonyms!(registry)
28
+ apply_custom_aliases!(registry)
29
+ registry
30
+ end
31
+
32
+ # Conservative domain-synonym aliasing. Mirror of TS applyDomainSynonyms
33
+ # in registry-loader.ts. For each synonym group, only register the rest
34
+ # of the group as aliases when exactly ONE entity claims a group member
35
+ # (via canonical name OR an existing alias) — avoids the "customer and
36
+ # user are both real entities" ambiguity.
37
+ SYNONYM_GROUPS = [
38
+ %w[ticket tickets conversation conversations chat chats],
39
+ %w[order orders purchase purchases transaction transactions],
40
+ %w[post posts article articles entry entries],
41
+ # Code repositories — does NOT include project/projects. Apps with
42
+ # both `repository` and `project` tables (Gitea) would split the
43
+ # group's claim and the entire group would skip via the single-
44
+ # claimer rule. Code review on V1.3-R caught this.
45
+ %w[repo repos repository repositories],
46
+ %w[issue issues bug bugs task tasks],
47
+ # Messaging / communication channels — Chatwoot uses `inbox`,
48
+ # Slack-style apps use `channel`. Single-claimer rule handles
49
+ # apps that have only one of the two as a real entity.
50
+ %w[channel channels inbox inboxes],
51
+ ].freeze
52
+
53
+ def apply_domain_synonyms!(registry)
54
+ SYNONYM_GROUPS.each do |group|
55
+ claiming = Set.new
56
+ group.each do |member|
57
+ lc = member.downcase
58
+ if registry.entities.key?(lc)
59
+ claiming << lc
60
+ next
61
+ end
62
+ target = registry.aliases[lc]
63
+ claiming << target if target
64
+ end
65
+ next unless claiming.size == 1
66
+ target = claiming.first
67
+ group.each do |member|
68
+ lc = member.downcase
69
+ next if registry.entities.key?(lc)
70
+ next if registry.aliases.key?(lc)
71
+ registry.aliases[lc] = target
72
+ end
73
+ end
74
+ end
75
+
76
+ # Merge developer-supplied aliases. Custom mappings always WIN — they
77
+ # override auto-detected aliases on conflict. Silently skips entries
78
+ # whose target entity doesn't exist in the registry (typo in config
79
+ # or stale config after schema change).
80
+ def apply_custom_aliases!(registry)
81
+ return if @custom_aliases.nil? || @custom_aliases.empty?
82
+ @custom_aliases.each do |raw_alias, raw_target|
83
+ alias_term = raw_alias.to_s.downcase.strip
84
+ target = raw_target.to_s.strip
85
+ next if alias_term.empty? || target.empty?
86
+ next unless registry.entities.key?(target)
87
+ next if registry.entities.key?(alias_term)
88
+ registry.aliases[alias_term] = target
89
+ end
90
+ end
91
+
92
+ # Build aliases that map common question phrasings to canonical entity names.
93
+ # Rules (no clash with a real canonical entity name):
94
+ # multi-word form (spaces) → snake_case entity name
95
+ # "service areas" → "service_area"
96
+ # plural form → singular entity name
97
+ # "users" / "customers" → already handled by string-match scoring,
98
+ # but we also explicitly map plural-snake → canonical
99
+ def build_entity_aliases(entities)
100
+ aliases = {}
101
+ # Process simpler names first so they get first dibs on auto-aliases.
102
+ # Without this, `account_user` (2 segments) could claim `account_users`
103
+ # before `user` (1 segment) had a chance to claim its own plural.
104
+ ordered_names = entities.keys.sort_by { |n| [n.split("_").length, n] }
105
+ ordered_names.each do |name|
106
+ # "service_area" → "service areas", "service area", "service_areas"
107
+ spaced = name.tr("_", " ")
108
+ spaced_plu = pluralize_simple(spaced)
109
+ snake_plu = pluralize_simple(name)
110
+ [spaced, spaced_plu, snake_plu].each do |alt|
111
+ next if alt == name
112
+ next if entities.key?(alt)
113
+ next if aliases.key?(alt)
114
+ aliases[alt] = name
115
+ end
116
+ end
117
+ aliases
118
+ end
119
+
120
+ def pluralize_simple(word)
121
+ return word + "es" if word.end_with?("s", "x", "ch", "sh")
122
+ return word[0..-2] + "ies" if word.end_with?("y") && !%w[a e i o u].include?(word[-2])
123
+ word + "s"
124
+ end
125
+
126
+ private
127
+
128
+ def discover_models
129
+ return [] unless defined?(ActiveRecord::Base)
130
+ eager_load_models!
131
+ ActiveRecord::Base.descendants.select do |m|
132
+ !m.abstract_class? && m.respond_to?(:table_name) && safe_table_exists?(m)
133
+ end
134
+ end
135
+
136
+ def eager_load_models!
137
+ return unless defined?(Rails) && Rails.respond_to?(:application) && Rails.application
138
+ return if Rails.application.config.eager_load
139
+ if defined?(Zeitwerk) && Rails.autoloaders.respond_to?(:main)
140
+ Rails.application.paths["app/models"]&.to_a&.each do |p|
141
+ abs = Rails.root.join(p).to_s
142
+ Rails.autoloaders.main.eager_load_dir(abs) if Dir.exist?(abs)
143
+ end
144
+ else
145
+ Rails.application.eager_load!
146
+ end
147
+ rescue => e
148
+ warn "[SqlChatbot] RegistryBuilder eager_load: #{e.message}"
149
+ end
150
+
151
+ def safe_table_exists?(model)
152
+ model.table_exists?
153
+ rescue
154
+ false
155
+ end
156
+
157
+ def build_entity(model, entity_name)
158
+ fields = build_fields(model)
159
+ timestamps = detect_timestamps(model)
160
+ Grammar::Entity.new(
161
+ name: entity_name,
162
+ table: model.table_name,
163
+ display_label: model.name,
164
+ row_count: safe_row_count(model),
165
+ primary_key: model.primary_key.to_s,
166
+ timestamps: timestamps,
167
+ fields: fields,
168
+ scopes: build_scopes(model),
169
+ associations: build_associations(model),
170
+ ranking_candidates: ranking_candidates_for(model),
171
+ implicit_filters: compile_implicit_filters(model.table_name, fields, timestamps)
172
+ )
173
+ end
174
+
175
+ # Compute the full set of implicit WHERE clauses for an entity. Two sources:
176
+ # 1. Schema-detected soft-delete column → `<col> IS NULL`
177
+ # 2. Developer-declared `default_filters` config (`*.col` or `table.col`)
178
+ #
179
+ # Returns structured `ImplicitFilter` entries — the template compiler
180
+ # emits SQL at compile time and uses `column` for dedup against any
181
+ # explicit user filter on the same column. Silently skips config entries
182
+ # whose column is absent from the table; a config entry on the same
183
+ # column as an auto-detected soft-delete entry overrides it.
184
+ def compile_implicit_filters(table, fields, timestamps)
185
+ out = []
186
+ deleted = timestamps[:deleted] || timestamps["deleted"]
187
+ if deleted
188
+ out << Grammar::ImplicitFilter.new(column: deleted, expr: "IS NULL", source: :soft_delete)
189
+ end
190
+ return out if @default_filters.nil? || @default_filters.empty?
191
+
192
+ @default_filters.each do |raw_key, raw_expr|
193
+ key = raw_key.to_s
194
+ expr = raw_expr.to_s.strip
195
+ next if key.empty? || expr.empty?
196
+ dot = key.index(".")
197
+ next if dot.nil? || dot.zero?
198
+ t = key[0...dot]
199
+ col = key[(dot + 1)..-1].to_s
200
+ next if col.empty?
201
+ next unless t == "*" || t == table
202
+ next unless fields.key?(col)
203
+ out.reject! { |f| f.column == col }
204
+ out << Grammar::ImplicitFilter.new(column: col, expr: expr, source: :default_filter)
205
+ end
206
+ out
207
+ end
208
+
209
+ def safe_row_count(model)
210
+ model.count
211
+ rescue
212
+ 0
213
+ end
214
+
215
+ def detect_timestamps(model)
216
+ cols = model.columns_hash.keys
217
+ ts = {}
218
+ ts[:created] = "created_at" if cols.include?("created_at")
219
+ ts[:updated] = "updated_at" if cols.include?("updated_at")
220
+ soft = SOFT_DELETE_COLS.find { |c| cols.include?(c) }
221
+ ts[:deleted] = soft if soft
222
+ ts
223
+ end
224
+
225
+ def build_fields(model)
226
+ enums = model.defined_enums
227
+ fields = {}
228
+ model.columns_hash.each do |col, info|
229
+ enum_vals = enums[col]
230
+ type = enum_vals ? :enum : map_type(info.type)
231
+ fields[col] = Grammar::Field.new(
232
+ column: col,
233
+ type: type,
234
+ nullable: info.null,
235
+ enum_values: enum_vals,
236
+ fk_to: nil,
237
+ user_facing_label: col.humanize,
238
+ searchable: type == :text
239
+ )
240
+ end
241
+ # Add field aliases for common human phrasings. For each real column,
242
+ # detect short synonyms and register alias keys pointing to the same
243
+ # Field struct. Only register an alias if it doesn't clash with an
244
+ # actual column and doesn't become ambiguous across two columns.
245
+ add_field_aliases!(fields)
246
+ fields
247
+ end
248
+
249
+ # Register alias keys on the fields hash so lookups like
250
+ # fields["rating"] work when the real column is "avg_rating".
251
+ # Rules:
252
+ # avg_X / X_avg → X (e.g., avg_rating → rating)
253
+ # X_count → count (e.g., jobs_count → count)
254
+ # total_X / X_total → total (e.g., total_amount → total)
255
+ # num_X → X (e.g., num_orders → orders)
256
+ # An alias is only added if (a) no real column has that name and
257
+ # (b) at most one existing field maps to that alias across the model.
258
+ def add_field_aliases!(fields)
259
+ alias_candidates = Hash.new { |h, k| h[k] = [] }
260
+ fields.each do |col, field|
261
+ synonyms_for(col).each { |syn| alias_candidates[syn] << field }
262
+ end
263
+ alias_candidates.each do |alias_key, matching_fields|
264
+ next if fields.key?(alias_key) # don't shadow real column
265
+ next unless matching_fields.size == 1 # skip ambiguous aliases
266
+ fields[alias_key] = matching_fields.first
267
+ end
268
+ end
269
+
270
+ def synonyms_for(col)
271
+ syns = []
272
+ if col =~ /\Aavg_(.+)\z/ then syns << Regexp.last_match(1) end
273
+ if col =~ /\A(.+)_avg\z/ then syns << Regexp.last_match(1) end
274
+ if col =~ /\A(.+)_count\z/ then syns << "count"; syns << Regexp.last_match(1) end
275
+ if col =~ /\Atotal_(.+)\z/ then syns << "total"; syns << Regexp.last_match(1) end
276
+ if col =~ /\A(.+)_total\z/ then syns << "total" end
277
+ if col =~ /\Anum_(.+)\z/ then syns << Regexp.last_match(1) end
278
+ syns
279
+ end
280
+
281
+ def build_scopes(model)
282
+ scopes = {}
283
+ enum_generated = enum_generated_scope_names(model)
284
+
285
+ model.singleton_methods(false).each do |method_name|
286
+ next if enum_generated.include?(method_name)
287
+
288
+ begin
289
+ relation = model.send(method_name)
290
+ next unless relation.is_a?(ActiveRecord::Relation)
291
+ sql = relation.to_sql
292
+ where_match = sql.match(/WHERE\s+(.+?)(?:\s+ORDER\s+BY|\s+LIMIT|\s*$)/i)
293
+ where_clause = where_match ? where_match[1].strip : ""
294
+
295
+ scopes[method_name.to_s] = Grammar::Scope.new(
296
+ name: method_name.to_s,
297
+ where_clause: where_clause,
298
+ param_slots: []
299
+ )
300
+ rescue
301
+ # skip scopes that raise (e.g. require arguments or reference missing columns)
302
+ end
303
+ end
304
+
305
+ scopes
306
+ rescue => e
307
+ warn "[SqlChatbot] scope extraction for #{model}: #{e.message}"
308
+ {}
309
+ end
310
+
311
+ # Returns a Set of method names that AR auto-generates for enum columns
312
+ # (e.g. :active, :not_active, :banned, :not_banned, :statuses).
313
+ def enum_generated_scope_names(model)
314
+ generated = Set.new
315
+ model.defined_enums.each do |col, values|
316
+ values.keys.each do |v|
317
+ generated << v.to_sym
318
+ generated << :"not_#{v}"
319
+ end
320
+ # AR adds a pluralized class accessor (e.g. User.statuses)
321
+ generated << :"#{col}s"
322
+ end
323
+ generated
324
+ rescue
325
+ Set.new
326
+ end
327
+
328
+ def build_associations(model)
329
+ model.reflect_on_all_associations.each_with_object({}) do |refl, h|
330
+ begin
331
+ next unless refl.klass
332
+ target = refl.klass.name.underscore
333
+ h[refl.name.to_s] = Grammar::Association.new(
334
+ name: refl.name.to_s,
335
+ kind: refl.macro,
336
+ target_entity: target,
337
+ join_clause: join_clause_for(refl),
338
+ through_entity: refl.options[:through]&.to_s
339
+ )
340
+ rescue
341
+ # skip associations pointing to missing models
342
+ end
343
+ end
344
+ end
345
+
346
+ def join_clause_for(refl)
347
+ owner_table = refl.active_record.table_name
348
+ target_table = refl.klass.table_name
349
+ case refl.macro
350
+ when :belongs_to
351
+ "#{owner_table}.#{refl.foreign_key} = #{target_table}.#{refl.association_primary_key}"
352
+ when :has_many, :has_one
353
+ "#{owner_table}.#{refl.active_record.primary_key} = #{target_table}.#{refl.foreign_key}"
354
+ else
355
+ ""
356
+ end
357
+ end
358
+
359
+ def ranking_candidates_for(model)
360
+ all = model.columns_hash.select { |_, c| [:integer, :decimal, :float, :datetime].include?(c.type) }.keys
361
+ # V1.3-U: order so metric-shaped columns come first; PKs and FKs go
362
+ # to the end. Default rank for "biggest/smallest X" used to be `id`
363
+ # which produced answers like "Lowest id across Repositories: 1".
364
+ primary_key = model.primary_key.to_s
365
+ metric, poor = all.partition do |name|
366
+ !(name == primary_key || name == "id" || name.end_with?("_id"))
367
+ end
368
+ metric + poor
369
+ end
370
+
371
+ def map_type(ar_type)
372
+ case ar_type
373
+ when :integer, :bigint then :int
374
+ when :string, :text then :text
375
+ when :boolean then :bool
376
+ when :datetime, :date, :time then :timestamp
377
+ when :decimal, :float then :decimal
378
+ when :json, :jsonb then :jsonb
379
+ when :uuid then :uuid
380
+ else :text
381
+ end
382
+ end
383
+ end
384
+ end
385
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SqlChatbot
4
+ module Services
5
+ class RouteIntrospector
6
+ INTERNAL_CONTROLLERS = %w[
7
+ active_storage/ action_mailbox/ action_cable/ rails/
8
+ sql_chatbot/
9
+ ].freeze
10
+
11
+ def introspect
12
+ return [] unless defined?(Rails) && Rails.application
13
+
14
+ Rails.application.routes.routes.filter_map do |route|
15
+ next if route.internal
16
+ next if internal_controller?(route)
17
+
18
+ path = normalize_path(route)
19
+ next if path.nil? || path.empty?
20
+
21
+ {
22
+ path: path,
23
+ method: extract_method(route),
24
+ label: derive_label(route),
25
+ parentPath: derive_parent(path),
26
+ }
27
+ end
28
+ rescue => e
29
+ warn "[SqlChatbot] RouteIntrospector: Could not introspect routes: #{e.message}"
30
+ []
31
+ end
32
+
33
+ def format_route_list
34
+ routes = introspect
35
+ return "No application routes detected." if routes.empty?
36
+
37
+ lines = routes.select { |r| r[:method] == "GET" }.map do |r|
38
+ parent_note = r[:parentPath] ? " (under #{r[:parentPath]})" : ""
39
+ "- #{r[:path]} \u2014 #{r[:label]}#{parent_note}"
40
+ end
41
+
42
+ "## Available Application Pages\n#{lines.join("\n")}"
43
+ end
44
+
45
+ private
46
+
47
+ def internal_controller?(route)
48
+ controller = route.defaults[:controller].to_s
49
+ INTERNAL_CONTROLLERS.any? { |prefix| controller.start_with?(prefix) }
50
+ end
51
+
52
+ def normalize_path(route)
53
+ path = route.path.spec.to_s
54
+ path = path.sub("(.:format)", "")
55
+ path = path.sub(/\(\..+\)$/, "")
56
+ path = "/" if path.empty?
57
+ path
58
+ end
59
+
60
+ def extract_method(route)
61
+ verb = route.verb
62
+ case verb
63
+ when Regexp
64
+ match = verb.source.gsub(/[\^$]/, "")
65
+ match.empty? ? "GET" : match.split("|").first
66
+ when String
67
+ verb.empty? ? "GET" : verb
68
+ else
69
+ "GET"
70
+ end
71
+ end
72
+
73
+ def derive_label(route)
74
+ controller = route.defaults[:controller].to_s
75
+ action = route.defaults[:action].to_s
76
+ base_name = controller.split("/").last.to_s
77
+ humanized = base_name.split("_").map(&:capitalize).join(" ")
78
+ singular = singularize(humanized)
79
+
80
+ case action
81
+ when "index"
82
+ humanized
83
+ when "show"
84
+ "#{singular} Detail"
85
+ when "new", "create"
86
+ "New #{singular}"
87
+ when "edit", "update"
88
+ "Edit #{singular}"
89
+ when "destroy"
90
+ "Delete #{singular}"
91
+ else
92
+ "#{humanized} #{action.split("_").map(&:capitalize).join(" ")}"
93
+ end
94
+ end
95
+
96
+ def derive_parent(path)
97
+ segments = path.split("/").reject(&:empty?)
98
+ return nil if segments.length <= 1
99
+ parent_segments = segments[0...-1]
100
+ parent_segments.pop while parent_segments.last&.start_with?(":")
101
+ return nil if parent_segments.empty?
102
+ "/#{parent_segments.join("/")}"
103
+ end
104
+
105
+ def singularize(word)
106
+ if word.end_with?("ies")
107
+ word[0...-3] + "y"
108
+ elsif word.end_with?("ses")
109
+ word[0...-2]
110
+ elsif word.end_with?("s") && !word.end_with?("ss")
111
+ word[0...-1]
112
+ else
113
+ word
114
+ end
115
+ end
116
+ end
117
+ end
118
+ end