sql-chatbot-rails 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +20 -0
- data/app/controllers/sql_chatbot/chatbot_controller.rb +158 -0
- data/config/routes.rb +11 -0
- data/lib/generators/sql_chatbot/install_generator.rb +25 -0
- data/lib/generators/sql_chatbot/templates/initializer.rb +22 -0
- data/lib/sql_chatbot/auth/cors.rb +35 -0
- data/lib/sql_chatbot/auth/jwt.rb +34 -0
- data/lib/sql_chatbot/configuration.rb +58 -0
- data/lib/sql_chatbot/engine.rb +23 -0
- data/lib/sql_chatbot/grammar/count_renderer.rb +113 -0
- data/lib/sql_chatbot/grammar/entity_candidates.rb +210 -0
- data/lib/sql_chatbot/grammar/intent_extractor.rb +191 -0
- data/lib/sql_chatbot/grammar/list_renderer.rb +50 -0
- data/lib/sql_chatbot/grammar/miss_logger.rb +17 -0
- data/lib/sql_chatbot/grammar/modifiers.rb +145 -0
- data/lib/sql_chatbot/grammar/primitives.rb +69 -0
- data/lib/sql_chatbot/grammar/programmatic_renderer.rb +258 -0
- data/lib/sql_chatbot/grammar/registry.rb +66 -0
- data/lib/sql_chatbot/grammar/sanity_check.rb +37 -0
- data/lib/sql_chatbot/grammar/template_compiler.rb +179 -0
- data/lib/sql_chatbot/llm/client.rb +87 -0
- data/lib/sql_chatbot/prompts/answer.rb +157 -0
- data/lib/sql_chatbot/prompts/classify.rb +59 -0
- data/lib/sql_chatbot/prompts/generate_sql.rb +88 -0
- data/lib/sql_chatbot/services/code_indexer.rb +337 -0
- data/lib/sql_chatbot/services/grammar_pipeline.rb +45 -0
- data/lib/sql_chatbot/services/model_introspector.rb +152 -0
- data/lib/sql_chatbot/services/orchestrator.rb +635 -0
- data/lib/sql_chatbot/services/registry_builder.rb +385 -0
- data/lib/sql_chatbot/services/route_introspector.rb +118 -0
- data/lib/sql_chatbot/services/schema_service.rb +884 -0
- data/lib/sql_chatbot/services/sql_executor.rb +81 -0
- data/lib/sql_chatbot/version.rb +5 -0
- data/lib/sql_chatbot_rails.rb +91 -0
- data/vendor/assets/widget.js +53 -0
- metadata +180 -0
|
@@ -0,0 +1,884 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SqlChatbot
|
|
4
|
+
module Services
|
|
5
|
+
class SchemaService
|
|
6
|
+
# Columns that indicate soft-delete patterns
|
|
7
|
+
SOFT_DELETE_COLUMNS = %w[deleted_at discarded_at archived_at removed_at].freeze
|
|
8
|
+
|
|
9
|
+
# Word-boundary patterns for sensitive columns — must match as whole "word segments"
|
|
10
|
+
# separated by underscores or string boundaries, to avoid false positives like
|
|
11
|
+
# "pinned_at" matching "pin".
|
|
12
|
+
SENSITIVE_PATTERNS = %w[
|
|
13
|
+
password passwd secret token ssn social_security
|
|
14
|
+
credit_card card_number cvv pin encrypted hash
|
|
15
|
+
salt private_key api_key auth_key access_key
|
|
16
|
+
].freeze
|
|
17
|
+
|
|
18
|
+
# Maps PostgreSQL data_type strings to concise labels used in the schema summary
|
|
19
|
+
TYPE_MAP = {
|
|
20
|
+
"character varying" => "VARCHAR",
|
|
21
|
+
"integer" => "INT",
|
|
22
|
+
"bigint" => "BIGINT",
|
|
23
|
+
"smallint" => "SMALLINT",
|
|
24
|
+
"timestamp without time zone" => "TIMESTAMP",
|
|
25
|
+
"timestamp with time zone" => "TIMESTAMPTZ",
|
|
26
|
+
"numeric" => "DECIMAL",
|
|
27
|
+
"boolean" => "BOOL",
|
|
28
|
+
"text" => "TEXT",
|
|
29
|
+
"date" => "DATE",
|
|
30
|
+
"double precision" => "DOUBLE",
|
|
31
|
+
"real" => "REAL",
|
|
32
|
+
"uuid" => "UUID",
|
|
33
|
+
"jsonb" => "JSONB",
|
|
34
|
+
"json" => "JSON",
|
|
35
|
+
}.freeze
|
|
36
|
+
|
|
37
|
+
# -------------------------------------------------------------------
|
|
38
|
+
# Class-level helpers
|
|
39
|
+
# -------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
# Returns true if the column name matches any sensitive pattern using
|
|
42
|
+
# word-boundary matching: pattern must appear between start-of-string /
|
|
43
|
+
# underscore boundaries. This avoids false positives like "pinned_at"
|
|
44
|
+
# matching "pin".
|
|
45
|
+
def self.sensitive?(column_name)
|
|
46
|
+
lower = column_name.downcase
|
|
47
|
+
SENSITIVE_PATTERNS.any? do |pattern|
|
|
48
|
+
lower.match?(/(?:^|_)#{Regexp.escape(pattern)}(?:$|_)/)
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Map a PostgreSQL data_type to a concise label; unknown types are uppercased.
|
|
53
|
+
def self.map_type(pg_type)
|
|
54
|
+
TYPE_MAP[pg_type] || pg_type.upcase
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# -------------------------------------------------------------------
|
|
58
|
+
# Instance
|
|
59
|
+
# -------------------------------------------------------------------
|
|
60
|
+
|
|
61
|
+
attr_reader :table_count
|
|
62
|
+
|
|
63
|
+
def initialize
|
|
64
|
+
@summary_text = ""
|
|
65
|
+
@tables = []
|
|
66
|
+
@per_table_schemas = {}
|
|
67
|
+
@table_index = {}
|
|
68
|
+
@fk_graph = {}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def summary
|
|
72
|
+
@summary_text
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def table_count
|
|
76
|
+
@tables.length
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Scan FK LOOKUP and RAILS ENUM annotations for values that match words in the question.
|
|
80
|
+
# Returns array of hint strings like:
|
|
81
|
+
# "The user mentions 'movies'. In the titles table, use WHERE category_id = 2 (Movie)."
|
|
82
|
+
# "The user mentions 'active'. In the contractors table, use WHERE status = 1 (Active)."
|
|
83
|
+
def find_lookup_hints(question)
|
|
84
|
+
return [] if @summary_text.empty?
|
|
85
|
+
|
|
86
|
+
# Filter out stop words that would match too broadly
|
|
87
|
+
stop_words = Set.new(%w[a an the is are was were be been being have has had do does did will would shall should may might can could how what when where who which why not and or but if then else for from by with at in on to of it its this that these those])
|
|
88
|
+
words = question.downcase.split(/\W+/).reject { |w| w.empty? || w.length < 2 || stop_words.include?(w) }
|
|
89
|
+
hints = []
|
|
90
|
+
current_table = nil
|
|
91
|
+
|
|
92
|
+
@summary_text.split("\n").each do |line|
|
|
93
|
+
if line.start_with?("TABLE ")
|
|
94
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
95
|
+
elsif line.include?("FK LOOKUP:") && current_table
|
|
96
|
+
match = line.match(/FK LOOKUP:\s+(\S+).*?values:\s+(.+)/)
|
|
97
|
+
next unless match
|
|
98
|
+
|
|
99
|
+
fk_col = match[1]
|
|
100
|
+
pairs = match[2].split(",").map(&:strip)
|
|
101
|
+
pairs.each do |pair|
|
|
102
|
+
id, name = pair.split("=", 2)
|
|
103
|
+
next unless name
|
|
104
|
+
clean_name = name.strip
|
|
105
|
+
next if clean_name.empty? || clean_name.length < 2 # Skip empty/tiny names
|
|
106
|
+
|
|
107
|
+
name_words = clean_name.downcase.split(/\W+/).reject(&:empty?)
|
|
108
|
+
matched_word = words.find do |w|
|
|
109
|
+
name_words.include?(w) ||
|
|
110
|
+
clean_name.downcase == w ||
|
|
111
|
+
(clean_name.length >= 3 && clean_name.downcase.start_with?(w)) ||
|
|
112
|
+
(w.length >= 3 && w.start_with?(clean_name.downcase))
|
|
113
|
+
end
|
|
114
|
+
if matched_word
|
|
115
|
+
hints << "The user mentions \"#{matched_word}\". In the #{current_table} table, use WHERE #{fk_col} = #{id.strip} (#{clean_name})."
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
elsif line.include?("RAILS ENUM:") && current_table
|
|
119
|
+
match = line.match(/RAILS ENUM:\s+(\S+)\s+values:\s+(.+)/)
|
|
120
|
+
next unless match
|
|
121
|
+
|
|
122
|
+
col = match[1]
|
|
123
|
+
pairs = match[2].split(",").map(&:strip)
|
|
124
|
+
pairs.each do |pair|
|
|
125
|
+
label, num = pair.split("=", 2)
|
|
126
|
+
next unless label && num
|
|
127
|
+
clean_label = label.strip
|
|
128
|
+
next if clean_label.empty? || clean_label.length < 2
|
|
129
|
+
|
|
130
|
+
label_words = clean_label.downcase.split(/\W+/).reject(&:empty?)
|
|
131
|
+
matched_word = words.find do |w|
|
|
132
|
+
label_words.include?(w) ||
|
|
133
|
+
clean_label.downcase == w ||
|
|
134
|
+
(clean_label.length >= 3 && clean_label.downcase.start_with?(w)) ||
|
|
135
|
+
(w.length >= 3 && w.start_with?(clean_label.downcase))
|
|
136
|
+
end
|
|
137
|
+
if matched_word
|
|
138
|
+
hints << "The user mentions \"#{matched_word}\". In the #{current_table} table, use WHERE #{col} = #{num.strip} (#{clean_label})."
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
hints.uniq.first(15) # Cap at 15 hints to avoid drowning the LLM
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# Extract RAILS ENUM annotations from a schema string for the answer prompt.
|
|
148
|
+
# Returns a string like:
|
|
149
|
+
# "contractors.status: Active=1, Inactive=2, Deleted=3\njobs.status: Active=1, ..."
|
|
150
|
+
def extract_enum_context(schema_text = nil)
|
|
151
|
+
source = schema_text || @summary_text
|
|
152
|
+
return "" if source.empty?
|
|
153
|
+
|
|
154
|
+
lines = []
|
|
155
|
+
current_table = nil
|
|
156
|
+
|
|
157
|
+
source.split("\n").each do |line|
|
|
158
|
+
if line.start_with?("TABLE ")
|
|
159
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
160
|
+
elsif line.include?("RAILS ENUM:") && current_table
|
|
161
|
+
match = line.match(/RAILS ENUM:\s+(\S+)\s+values:\s+(.+)/)
|
|
162
|
+
next unless match
|
|
163
|
+
lines << "#{current_table}.#{match[1]}: #{match[2]}"
|
|
164
|
+
end
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
lines.join("\n")
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
# Returns a short string listing all known table names.
|
|
171
|
+
# Used by the classify prompt so the LLM can see available tables without
|
|
172
|
+
# the full schema (~500 tokens vs ~275K chars for the full schema).
|
|
173
|
+
def table_names
|
|
174
|
+
return "" if @tables.empty?
|
|
175
|
+
|
|
176
|
+
"Available tables: #{@tables.join(', ')}"
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Given an array of search terms (e.g. ["customers"] or ["jobs", "job_types"]),
|
|
180
|
+
# returns a schema string containing ONLY the matching tables plus any tables
|
|
181
|
+
# needed to join them (bridge tables via FK paths, max depth 2).
|
|
182
|
+
#
|
|
183
|
+
# Falls back to hub tables (top 10 by FK edge count) when no terms match.
|
|
184
|
+
def select_schema(terms)
|
|
185
|
+
# If indexes are empty (discover not yet called), return full summary
|
|
186
|
+
return @summary_text if @per_table_schemas.empty?
|
|
187
|
+
|
|
188
|
+
# Step 1: Score tables by relevance
|
|
189
|
+
scores = score_tables(terms)
|
|
190
|
+
|
|
191
|
+
# Step 2: Fallback to hub tables if no matches
|
|
192
|
+
if scores.empty?
|
|
193
|
+
hub_tables(8).each { |t| scores[t] = 1 }
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Step 3: Take top 8 by score
|
|
197
|
+
max_primary = 8
|
|
198
|
+
top_tables = scores.sort_by { |_, s| -s }.first(max_primary).map(&:first)
|
|
199
|
+
|
|
200
|
+
# Step 4: Find FK join paths between top tables
|
|
201
|
+
all_tables = Set.new(top_tables)
|
|
202
|
+
top_tables.combination(2).each do |from, to|
|
|
203
|
+
bridge = find_join_path(from, to)
|
|
204
|
+
all_tables.merge(bridge) unless bridge.nil?
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
# Step 5: Cap total at 12
|
|
208
|
+
max_total = 12
|
|
209
|
+
final_tables = if all_tables.size <= max_total
|
|
210
|
+
all_tables
|
|
211
|
+
else
|
|
212
|
+
Set.new((top_tables + all_tables.to_a.reject { |t| top_tables.include?(t) }).first(max_total))
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Step 6: Build schema string preserving original order
|
|
216
|
+
@tables.select { |t| final_tables.include?(t) }
|
|
217
|
+
.map { |t| @per_table_schemas[t] }
|
|
218
|
+
.compact
|
|
219
|
+
.join("\n")
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Inject model-level annotations (from ModelIntrospector) into the schema summary.
|
|
223
|
+
# annotations_by_table: Hash of table_name => [annotation_strings]
|
|
224
|
+
# Each annotation is inserted after the TABLE line and any existing annotations.
|
|
225
|
+
def append_model_annotations(annotations_by_table)
|
|
226
|
+
return if annotations_by_table.nil? || annotations_by_table.empty?
|
|
227
|
+
|
|
228
|
+
lines = @summary_text.split("\n")
|
|
229
|
+
result = []
|
|
230
|
+
current_table = nil
|
|
231
|
+
|
|
232
|
+
lines.each do |line|
|
|
233
|
+
if line.start_with?("TABLE ")
|
|
234
|
+
# Before moving to next table, flush pending annotations for previous table
|
|
235
|
+
if current_table && annotations_by_table.key?(current_table)
|
|
236
|
+
annotations_by_table[current_table].each { |ann| result << ann }
|
|
237
|
+
end
|
|
238
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
239
|
+
end
|
|
240
|
+
result << line
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Flush annotations for the last table
|
|
244
|
+
if current_table && annotations_by_table.key?(current_table)
|
|
245
|
+
annotations_by_table[current_table].each { |ann| result << ann }
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
@summary_text = result.join("\n")
|
|
249
|
+
|
|
250
|
+
# Also update per-table schemas so select_schema() includes the annotations
|
|
251
|
+
annotations_by_table.each do |table, annotations|
|
|
252
|
+
next unless @per_table_schemas.key?(table)
|
|
253
|
+
annotations.each do |ann|
|
|
254
|
+
@per_table_schemas[table] += "\n#{ann}"
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
# Introspect the database and build a schema summary string with enrichment
|
|
260
|
+
# annotations (soft delete, polymorphic, lookup values, enums, check constraints).
|
|
261
|
+
# Requires ActiveRecord::Base.connection to be available.
|
|
262
|
+
def discover
|
|
263
|
+
conn = ActiveRecord::Base.connection
|
|
264
|
+
|
|
265
|
+
# Run all introspection queries
|
|
266
|
+
table_names = query_tables(conn)
|
|
267
|
+
columns_rows = query_columns(conn)
|
|
268
|
+
pk_rows = query_primary_keys(conn)
|
|
269
|
+
fk_rows = query_foreign_keys(conn)
|
|
270
|
+
enum_rows = query_enums(conn)
|
|
271
|
+
check_rows = query_check_constraints(conn)
|
|
272
|
+
|
|
273
|
+
# Index primary keys: Set of "table.column"
|
|
274
|
+
pk_set = Set.new(pk_rows.map { |r| "#{r['table_name']}.#{r['column_name']}" })
|
|
275
|
+
|
|
276
|
+
# Index foreign keys: Hash of "from_table.from_column" => "to_table.to_column"
|
|
277
|
+
fk_map = fk_rows.each_with_object({}) do |r, h|
|
|
278
|
+
h["#{r['from_table']}.#{r['from_column']}"] = "#{r['to_table']}.#{r['to_column']}"
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
# Index enum types: enum_name => [ordered values]
|
|
282
|
+
enum_map = enum_rows.each_with_object({}) do |r, h|
|
|
283
|
+
(h[r["enum_name"]] ||= []) << r["enum_value"]
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Parse check constraints for IN (...) or ANY(ARRAY[...]) patterns
|
|
287
|
+
check_enum_map = {}
|
|
288
|
+
check_rows.each do |r|
|
|
289
|
+
result = parse_check_constraint(r["check_def"])
|
|
290
|
+
next unless result
|
|
291
|
+
|
|
292
|
+
col_name, values = result
|
|
293
|
+
check_enum_map["#{r['table_name']}.#{col_name}"] = values
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Group columns by table
|
|
297
|
+
columns_by_table = columns_rows.each_with_object({}) do |col, h|
|
|
298
|
+
(h[col["table_name"]] ||= []) << col
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
# Collect FK target tables for lookup value detection
|
|
302
|
+
fk_target_tables = Set.new(fk_rows.map { |r| r["to_table"] })
|
|
303
|
+
|
|
304
|
+
# Convention-based references: *_id columns => plural table names
|
|
305
|
+
table_name_set = Set.new(table_names)
|
|
306
|
+
columns_by_table.each_value do |cols|
|
|
307
|
+
cols.each do |col|
|
|
308
|
+
col_name = col["column_name"]
|
|
309
|
+
next unless col_name.end_with?("_id")
|
|
310
|
+
next if pk_set.include?("#{col['table_name']}.#{col_name}")
|
|
311
|
+
|
|
312
|
+
base = col_name[0..-4] # remove '_id'
|
|
313
|
+
candidates = [
|
|
314
|
+
"#{base}s",
|
|
315
|
+
"#{base.sub(/y$/, 'ie')}s",
|
|
316
|
+
"#{base}es",
|
|
317
|
+
base,
|
|
318
|
+
]
|
|
319
|
+
candidates.each do |candidate|
|
|
320
|
+
if table_name_set.include?(candidate)
|
|
321
|
+
fk_target_tables.add(candidate)
|
|
322
|
+
break
|
|
323
|
+
end
|
|
324
|
+
end
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Discover lookup values for small referenced tables
|
|
329
|
+
lookup_values = discover_lookup_values(conn, fk_target_tables, columns_by_table, pk_set)
|
|
330
|
+
|
|
331
|
+
# Get approximate row counts for all tables (helps LLM distinguish data vs config tables)
|
|
332
|
+
row_counts = query_row_counts(conn)
|
|
333
|
+
|
|
334
|
+
# Build summary lines
|
|
335
|
+
lines = []
|
|
336
|
+
table_names.each do |table|
|
|
337
|
+
columns = columns_by_table[table] || []
|
|
338
|
+
col_parts = []
|
|
339
|
+
annotations = []
|
|
340
|
+
col_name_types = {} # column_name => mapped_type (for polymorphic detection)
|
|
341
|
+
|
|
342
|
+
columns.each do |col|
|
|
343
|
+
next if self.class.sensitive?(col["column_name"])
|
|
344
|
+
|
|
345
|
+
key = "#{table}.#{col['column_name']}"
|
|
346
|
+
|
|
347
|
+
# Resolve enum values: PG native enum or check-constraint enum
|
|
348
|
+
enum_values = if col["data_type"] == "USER-DEFINED" && col["udt_name"]
|
|
349
|
+
enum_map[col["udt_name"]]
|
|
350
|
+
end
|
|
351
|
+
enum_values ||= check_enum_map[key]
|
|
352
|
+
|
|
353
|
+
mapped_type = if enum_values
|
|
354
|
+
"ENUM(#{enum_values.join(',')})"
|
|
355
|
+
else
|
|
356
|
+
self.class.map_type(col["data_type"])
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
part = "#{col['column_name']} #{mapped_type}"
|
|
360
|
+
part += " PK" if pk_set.include?(key)
|
|
361
|
+
part += " FK=>#{fk_map[key]}" if fk_map.key?(key)
|
|
362
|
+
|
|
363
|
+
col_parts << part
|
|
364
|
+
col_name_types[col["column_name"]] = mapped_type
|
|
365
|
+
|
|
366
|
+
# Defer soft delete annotation (applied after model introspection)
|
|
367
|
+
if SOFT_DELETE_COLUMNS.include?(col["column_name"])
|
|
368
|
+
(@deferred_soft_deletes ||= {})[table] ||= []
|
|
369
|
+
@deferred_soft_deletes[table] << col["column_name"]
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Enum value annotation
|
|
373
|
+
if enum_values
|
|
374
|
+
annotations << " -- ENUM: #{col['column_name']} values: #{enum_values.join(', ')}"
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Polymorphic association detection
|
|
379
|
+
col_name_types.each do |col_name, col_type|
|
|
380
|
+
next unless col_name.end_with?("_type") && %w[VARCHAR TEXT].include?(col_type)
|
|
381
|
+
|
|
382
|
+
prefix = col_name[0..-6] # remove '_type'
|
|
383
|
+
id_col = "#{prefix}_id"
|
|
384
|
+
id_type = col_name_types[id_col]
|
|
385
|
+
if id_type && %w[INT BIGINT].include?(id_type)
|
|
386
|
+
annotations << " -- POLYMORPHIC: #{col_name} + #{id_col} (join target depends on type value)"
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
# Lookup values annotation
|
|
391
|
+
if lookup_values.key?(table)
|
|
392
|
+
annotations << " -- VALUES: #{lookup_values[table]}"
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
count = row_counts[table]
|
|
396
|
+
count_hint = count ? " (~#{count} rows)" : ""
|
|
397
|
+
lines << "TABLE #{table}#{count_hint} (#{col_parts.join(', ')})"
|
|
398
|
+
annotations.each { |ann| lines << ann }
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
@tables = table_names
|
|
402
|
+
@summary_text = lines.join("\n")
|
|
403
|
+
|
|
404
|
+
build_per_table_schemas(lines)
|
|
405
|
+
build_table_index(columns_by_table)
|
|
406
|
+
build_fk_graph(fk_rows, columns_by_table, table_name_set)
|
|
407
|
+
end
|
|
408
|
+
|
|
409
|
+
# Re-discover schema (alias for discover)
|
|
410
|
+
def refresh
|
|
411
|
+
discover
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Move "-- VALUES:" annotations from lookup tables to the FK columns that reference them.
|
|
415
|
+
# After this, LLMs see lookup values next to the FK column (e.g., category_id) instead of
|
|
416
|
+
# on the lookup table itself, preventing confusion between unrelated integer columns.
|
|
417
|
+
def relocate_lookup_annotations
|
|
418
|
+
lines = @summary_text.split("\n")
|
|
419
|
+
|
|
420
|
+
# Step 1: Extract VALUES annotations and their tables
|
|
421
|
+
lookup_values = {} # table_name => values_string
|
|
422
|
+
lines_without_values = []
|
|
423
|
+
current_table = nil
|
|
424
|
+
|
|
425
|
+
lines.each do |line|
|
|
426
|
+
if line.start_with?("TABLE ")
|
|
427
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
if line.strip.start_with?("-- VALUES:")
|
|
431
|
+
lookup_values[current_table] = line.strip.sub("-- VALUES: ", "") if current_table
|
|
432
|
+
else
|
|
433
|
+
lines_without_values << line
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
return if lookup_values.empty?
|
|
438
|
+
|
|
439
|
+
# Step 2: Build convention-based table name patterns for matching
|
|
440
|
+
convention_map = {} # "singular_id" => lookup_table
|
|
441
|
+
lookup_values.each_key do |table|
|
|
442
|
+
singular = if table.end_with?("ies")
|
|
443
|
+
table[0..-4] + "y"
|
|
444
|
+
elsif table.end_with?("ses")
|
|
445
|
+
table[0..-3]
|
|
446
|
+
elsif table.end_with?("s")
|
|
447
|
+
table[0..-2]
|
|
448
|
+
else
|
|
449
|
+
table
|
|
450
|
+
end
|
|
451
|
+
convention_map["#{singular}_id"] = table
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Step 3: Find FK columns and inject FK LOOKUP annotations
|
|
455
|
+
result = []
|
|
456
|
+
lines_without_values.each do |line|
|
|
457
|
+
result << line
|
|
458
|
+
|
|
459
|
+
if line.start_with?("TABLE ")
|
|
460
|
+
# Match explicit FK references: "column_name INT FK=>target_table.target_column"
|
|
461
|
+
lookup_values.each do |lookup_table, values|
|
|
462
|
+
line.scan(/(\w+)\s+\w+\s+FK=>#{Regexp.escape(lookup_table)}\.(\w+)/).each do |fk_col, _target_col|
|
|
463
|
+
result << " -- FK LOOKUP: #{fk_col} values: #{values}"
|
|
464
|
+
end
|
|
465
|
+
end
|
|
466
|
+
|
|
467
|
+
# Match convention-based references: "category_id INT" (no FK=> marker)
|
|
468
|
+
convention_map.each do |fk_col_name, lookup_table|
|
|
469
|
+
# Skip if already matched by explicit FK above
|
|
470
|
+
next if line.include?("#{fk_col_name} ") && line.include?("FK=>#{lookup_table}")
|
|
471
|
+
if line.match?(/\b#{Regexp.escape(fk_col_name)}\s+\w+(?!\s+FK)/)
|
|
472
|
+
result << " -- FK LOOKUP: #{fk_col_name} values: #{lookup_values[lookup_table]}"
|
|
473
|
+
end
|
|
474
|
+
end
|
|
475
|
+
end
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
@summary_text = result.join("\n")
|
|
479
|
+
end
|
|
480
|
+
|
|
481
|
+
# Apply soft delete annotations conditionally based on model introspection results.
|
|
482
|
+
# - Tables using a soft delete gem (paranoia, discard): always add SOFT DELETE annotation
|
|
483
|
+
# - Tables with enum soft delete but no gem: suppress SOFT DELETE (enum is the real mechanism)
|
|
484
|
+
# - Tables with neither: add SOFT DELETE annotation (assume column is used)
|
|
485
|
+
def apply_soft_delete_annotations(soft_delete_tables:, enum_soft_delete_tables:)
|
|
486
|
+
return if @deferred_soft_deletes.nil? || @deferred_soft_deletes.empty?
|
|
487
|
+
|
|
488
|
+
new_annotations = {}
|
|
489
|
+
@deferred_soft_deletes.each do |table, columns|
|
|
490
|
+
if soft_delete_tables.include?(table)
|
|
491
|
+
# Gem manages this column — keep the annotation
|
|
492
|
+
columns.each do |col|
|
|
493
|
+
(new_annotations[table] ||= []) << " -- SOFT DELETE: filter #{col} IS NULL for active records"
|
|
494
|
+
end
|
|
495
|
+
elsif enum_soft_delete_tables.include?(table)
|
|
496
|
+
# Enum is the real soft delete, column is likely unused — suppress
|
|
497
|
+
next
|
|
498
|
+
else
|
|
499
|
+
# No competing mechanism — assume column is used
|
|
500
|
+
columns.each do |col|
|
|
501
|
+
(new_annotations[table] ||= []) << " -- SOFT DELETE: filter #{col} IS NULL for active records"
|
|
502
|
+
end
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
append_model_annotations(new_annotations) unless new_annotations.empty?
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
private
|
|
510
|
+
|
|
511
|
+
# -------------------------------------------------------------------
|
|
512
|
+
# Smart schema index builders (called at end of discover)
|
|
513
|
+
# -------------------------------------------------------------------
|
|
514
|
+
|
|
515
|
+
# Parse the lines array produced by discover() and split into per-table chunks.
|
|
516
|
+
# Each entry in @per_table_schemas is the full multi-line string for one table,
|
|
517
|
+
# including its TABLE header line and all annotation lines.
|
|
518
|
+
def build_per_table_schemas(lines)
|
|
519
|
+
@per_table_schemas = {}
|
|
520
|
+
current_table = nil
|
|
521
|
+
current_lines = []
|
|
522
|
+
|
|
523
|
+
lines.each do |line|
|
|
524
|
+
if line.start_with?("TABLE ")
|
|
525
|
+
# Flush previous table
|
|
526
|
+
if current_table
|
|
527
|
+
@per_table_schemas[current_table] = current_lines.join("\n")
|
|
528
|
+
end
|
|
529
|
+
current_table = line.match(/^TABLE (\S+)/)[1]
|
|
530
|
+
current_lines = [line]
|
|
531
|
+
else
|
|
532
|
+
current_lines << line if current_table
|
|
533
|
+
end
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
# Flush last table
|
|
537
|
+
if current_table
|
|
538
|
+
@per_table_schemas[current_table] = current_lines.join("\n")
|
|
539
|
+
end
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# Build an inverted index: column_name => [table_names].
|
|
543
|
+
# Skips sensitive columns so they can't be used as search hints.
|
|
544
|
+
def build_table_index(columns_by_table)
|
|
545
|
+
@table_index = {}
|
|
546
|
+
columns_by_table.each do |table, columns|
|
|
547
|
+
columns.each do |col|
|
|
548
|
+
col_name = col["column_name"]
|
|
549
|
+
next if self.class.sensitive?(col_name)
|
|
550
|
+
|
|
551
|
+
(@table_index[col_name] ||= []) << table
|
|
552
|
+
end
|
|
553
|
+
end
|
|
554
|
+
end
|
|
555
|
+
|
|
556
|
+
# Build a bidirectional FK graph: table_name => [{from_col:, to_table:, to_col:}]
|
|
557
|
+
# Includes both explicit FK constraints and convention-based _id columns.
|
|
558
|
+
def build_fk_graph(fk_rows, columns_by_table, table_name_set)
|
|
559
|
+
@fk_graph = {}
|
|
560
|
+
|
|
561
|
+
# Explicit FK constraints (both directions)
|
|
562
|
+
fk_rows.each do |r|
|
|
563
|
+
from_table = r["from_table"]
|
|
564
|
+
to_table = r["to_table"]
|
|
565
|
+
from_col = r["from_column"]
|
|
566
|
+
to_col = r["to_column"]
|
|
567
|
+
|
|
568
|
+
# Forward: from_table -> to_table
|
|
569
|
+
(@fk_graph[from_table] ||= []) << { from_col: from_col, to_table: to_table, to_col: to_col }
|
|
570
|
+
# Reverse: to_table -> from_table
|
|
571
|
+
(@fk_graph[to_table] ||= []) << { from_col: to_col, to_table: from_table, to_col: from_col }
|
|
572
|
+
end
|
|
573
|
+
|
|
574
|
+
# Convention-based: *_id columns that resolve to a table name
|
|
575
|
+
columns_by_table.each do |table, columns|
|
|
576
|
+
columns.each do |col|
|
|
577
|
+
col_name = col["column_name"]
|
|
578
|
+
next unless col_name.end_with?("_id")
|
|
579
|
+
|
|
580
|
+
base = col_name[0..-4] # remove '_id'
|
|
581
|
+
candidates = [
|
|
582
|
+
"#{base}s",
|
|
583
|
+
"#{base.sub(/y$/, 'ie')}s",
|
|
584
|
+
"#{base}es",
|
|
585
|
+
base,
|
|
586
|
+
]
|
|
587
|
+
candidates.each do |candidate|
|
|
588
|
+
next unless table_name_set.include?(candidate)
|
|
589
|
+
|
|
590
|
+
# Add forward edge if not already present from explicit FKs
|
|
591
|
+
existing = (@fk_graph[table] ||= [])
|
|
592
|
+
already = existing.any? { |e| e[:from_col] == col_name && e[:to_table] == candidate }
|
|
593
|
+
unless already
|
|
594
|
+
existing << { from_col: col_name, to_table: candidate, to_col: "id" }
|
|
595
|
+
(@fk_graph[candidate] ||= []) << { from_col: "id", to_table: table, to_col: col_name }
|
|
596
|
+
end
|
|
597
|
+
break
|
|
598
|
+
end
|
|
599
|
+
end
|
|
600
|
+
end
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
# Match an array of search terms to table names.
|
|
604
|
+
# Tries (in order): exact table name, singular/plural variants, column name match,
|
|
605
|
+
# substring match on column names.
|
|
606
|
+
# Returns a Set of matching table names.
|
|
607
|
+
def score_tables(terms)
|
|
608
|
+
scores = Hash.new(0)
|
|
609
|
+
table_set = Set.new(@tables)
|
|
610
|
+
|
|
611
|
+
terms.each do |term|
|
|
612
|
+
t = term.to_s.downcase.strip
|
|
613
|
+
next if t.empty?
|
|
614
|
+
|
|
615
|
+
@tables.each do |table|
|
|
616
|
+
# Exact match (highest priority)
|
|
617
|
+
if table == t
|
|
618
|
+
scores[table] += 10
|
|
619
|
+
next
|
|
620
|
+
end
|
|
621
|
+
|
|
622
|
+
# Singular/plural match
|
|
623
|
+
variants = ["#{t}s", "#{t.sub(/y$/, 'ie')}s", "#{t}es", t.sub(/ies$/, 'y'), t.sub(/s$/, '')]
|
|
624
|
+
if variants.include?(table) || variants.any? { |v| v == table }
|
|
625
|
+
scores[table] += 8
|
|
626
|
+
next
|
|
627
|
+
end
|
|
628
|
+
|
|
629
|
+
# Django/prefix match: "order" matches "order_order" (primary=7) vs "order_orderevent" (secondary=4)
|
|
630
|
+
parts = table.split("_", 2)
|
|
631
|
+
if parts.length >= 2
|
|
632
|
+
app_name = parts[0]
|
|
633
|
+
model_name = parts[1]
|
|
634
|
+
if app_name == t
|
|
635
|
+
scores[table] += (model_name == t || model_name == app_name) ? 7 : 4
|
|
636
|
+
next
|
|
637
|
+
end
|
|
638
|
+
model_singular = model_name.sub(/s$/, "")
|
|
639
|
+
if model_name == t || model_singular == t
|
|
640
|
+
scores[table] += 6
|
|
641
|
+
next
|
|
642
|
+
end
|
|
643
|
+
end
|
|
644
|
+
|
|
645
|
+
# General substring (low priority, min 3 chars)
|
|
646
|
+
if t.length >= 3 && table.include?(t)
|
|
647
|
+
scores[table] += 2
|
|
648
|
+
end
|
|
649
|
+
end
|
|
650
|
+
|
|
651
|
+
# Column name match
|
|
652
|
+
@table_index.each do |col_name, tables|
|
|
653
|
+
if col_name == t || col_name == "#{t}_id"
|
|
654
|
+
tables.each { |tbl| scores[tbl] += 3 }
|
|
655
|
+
end
|
|
656
|
+
end
|
|
657
|
+
end
|
|
658
|
+
|
|
659
|
+
scores.select { |_, v| v > 0 }
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
# Keep match_tables as alias for backward compatibility (used in tests)
|
|
663
|
+
def match_tables(terms)
|
|
664
|
+
Set.new(score_tables(terms).keys)
|
|
665
|
+
end
|
|
666
|
+
|
|
667
|
+
# Return the top N tables by FK edge count (most-connected = hub tables).
|
|
668
|
+
# Used as fallback when no search terms match any table.
|
|
669
|
+
def hub_tables(limit)
|
|
670
|
+
sorted = @tables.sort_by { |t| -(@fk_graph[t]&.length || 0) }
|
|
671
|
+
Set.new(sorted.first(limit))
|
|
672
|
+
end
|
|
673
|
+
|
|
674
|
+
# BFS to find shortest FK join path between two tables (max depth 2).
|
|
675
|
+
# Returns:
|
|
676
|
+
# [] — tables are directly connected (no bridge needed)
|
|
677
|
+
# [t] — one bridge table t is needed
|
|
678
|
+
# nil — no path found within max depth
|
|
679
|
+
def find_join_path(from, to)
|
|
680
|
+
return [] if from == to
|
|
681
|
+
|
|
682
|
+
# Depth 1: direct edge
|
|
683
|
+
neighbors_from = (@fk_graph[from] || []).map { |e| e[:to_table] }
|
|
684
|
+
return [] if neighbors_from.include?(to)
|
|
685
|
+
|
|
686
|
+
# Depth 2: one bridge table
|
|
687
|
+
neighbors_from.each do |bridge|
|
|
688
|
+
bridge_neighbors = (@fk_graph[bridge] || []).map { |e| e[:to_table] }
|
|
689
|
+
return [bridge] if bridge_neighbors.include?(to)
|
|
690
|
+
end
|
|
691
|
+
|
|
692
|
+
nil
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
# -------------------------------------------------------------------
|
|
696
|
+
# Legacy detection helpers (also called from discover loop above)
|
|
697
|
+
# -------------------------------------------------------------------
|
|
698
|
+
|
|
699
|
+
# Detect polymorphic associations: _type VARCHAR/TEXT + _id INT/BIGINT pairs.
|
|
700
|
+
# Returns array of prefix strings (e.g., ["commentable", "taggable"]).
|
|
701
|
+
def detect_polymorphic(columns)
|
|
702
|
+
col_types = {}
|
|
703
|
+
columns.each do |col|
|
|
704
|
+
col_types[col["column_name"]] = self.class.map_type(col["data_type"])
|
|
705
|
+
end
|
|
706
|
+
|
|
707
|
+
prefixes = []
|
|
708
|
+
col_types.each do |col_name, col_type|
|
|
709
|
+
next unless col_name.end_with?("_type") && %w[VARCHAR TEXT].include?(col_type)
|
|
710
|
+
|
|
711
|
+
prefix = col_name[0..-6] # remove '_type'
|
|
712
|
+
id_col = "#{prefix}_id"
|
|
713
|
+
id_type = col_types[id_col]
|
|
714
|
+
prefixes << prefix if id_type && %w[INT BIGINT].include?(id_type)
|
|
715
|
+
end
|
|
716
|
+
|
|
717
|
+
prefixes
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
# Detect soft-delete column. Returns the column name or nil.
|
|
721
|
+
def detect_soft_delete(columns)
|
|
722
|
+
columns.each do |col|
|
|
723
|
+
return col["column_name"] if SOFT_DELETE_COLUMNS.include?(col["column_name"])
|
|
724
|
+
end
|
|
725
|
+
nil
|
|
726
|
+
end
|
|
727
|
+
|
|
728
|
+
# Parse a check constraint definition for IN (...) or ANY(ARRAY[...]) enum patterns.
|
|
729
|
+
# Returns [column_name, [values]] or nil.
|
|
730
|
+
def parse_check_constraint(check_def)
|
|
731
|
+
# Format 1: ((col)::text = ANY ((ARRAY['a'::varchar, 'b'::varchar])::text[]))
|
|
732
|
+
match = check_def.match(/\(\((\w+)\)::\w+\s*=\s*ANY\s*\(\(?ARRAY\[([^\]]+)\]/i)
|
|
733
|
+
# Format 2: (col IN ('a', 'b', 'c'))
|
|
734
|
+
match ||= check_def.match(/\((\w+)\s+IN\s*\(([^)]+)\)/i)
|
|
735
|
+
return nil unless match
|
|
736
|
+
|
|
737
|
+
col_name = match[1]
|
|
738
|
+
values_str = match[2]
|
|
739
|
+
return nil if values_str.nil? || values_str.empty?
|
|
740
|
+
|
|
741
|
+
values = values_str.split(",").map do |v|
|
|
742
|
+
v.strip.sub(/^'([^']*)'(?:::\w+.*)?$/, '\1').strip
|
|
743
|
+
end.reject(&:empty?)
|
|
744
|
+
|
|
745
|
+
values.empty? ? nil : [col_name, values]
|
|
746
|
+
end
|
|
747
|
+
|
|
748
|
+
# -------------------------------------------------------------------
|
|
749
|
+
# SQL query helpers (require ActiveRecord::Base.connection)
|
|
750
|
+
# -------------------------------------------------------------------
|
|
751
|
+
|
|
752
|
+
def query_tables(conn)
|
|
753
|
+
rows = conn.exec_query(<<~SQL)
|
|
754
|
+
SELECT table_name FROM information_schema.tables
|
|
755
|
+
WHERE table_schema = 'public' AND table_type = 'BASE TABLE'
|
|
756
|
+
ORDER BY table_name
|
|
757
|
+
SQL
|
|
758
|
+
rows.map { |r| r["table_name"] }
|
|
759
|
+
end
|
|
760
|
+
|
|
761
|
+
def query_columns(conn)
|
|
762
|
+
conn.exec_query(<<~SQL).to_a
|
|
763
|
+
SELECT table_name, column_name, data_type, udt_name, is_nullable, column_default
|
|
764
|
+
FROM information_schema.columns
|
|
765
|
+
WHERE table_schema = 'public'
|
|
766
|
+
ORDER BY table_name, ordinal_position
|
|
767
|
+
SQL
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
def query_primary_keys(conn)
|
|
771
|
+
conn.exec_query(<<~SQL).to_a
|
|
772
|
+
SELECT kcu.table_name, kcu.column_name
|
|
773
|
+
FROM information_schema.table_constraints tc
|
|
774
|
+
JOIN information_schema.key_column_usage kcu
|
|
775
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
776
|
+
AND tc.constraint_schema = kcu.constraint_schema
|
|
777
|
+
WHERE tc.constraint_type = 'PRIMARY KEY' AND tc.table_schema = 'public'
|
|
778
|
+
SQL
|
|
779
|
+
end
|
|
780
|
+
|
|
781
|
+
def query_foreign_keys(conn)
|
|
782
|
+
conn.exec_query(<<~SQL).to_a
|
|
783
|
+
SELECT
|
|
784
|
+
kcu.table_name AS from_table,
|
|
785
|
+
kcu.column_name AS from_column,
|
|
786
|
+
ccu.table_name AS to_table,
|
|
787
|
+
ccu.column_name AS to_column
|
|
788
|
+
FROM information_schema.table_constraints tc
|
|
789
|
+
JOIN information_schema.key_column_usage kcu
|
|
790
|
+
ON tc.constraint_name = kcu.constraint_name
|
|
791
|
+
AND tc.constraint_schema = kcu.constraint_schema
|
|
792
|
+
JOIN information_schema.constraint_column_usage ccu
|
|
793
|
+
ON tc.constraint_name = ccu.constraint_name
|
|
794
|
+
AND tc.constraint_schema = ccu.constraint_schema
|
|
795
|
+
WHERE tc.constraint_type = 'FOREIGN KEY' AND tc.table_schema = 'public'
|
|
796
|
+
SQL
|
|
797
|
+
end
|
|
798
|
+
|
|
799
|
+
def query_enums(conn)
|
|
800
|
+
conn.exec_query(<<~SQL).to_a
|
|
801
|
+
SELECT t.typname AS enum_name, e.enumlabel AS enum_value
|
|
802
|
+
FROM pg_enum e
|
|
803
|
+
JOIN pg_type t ON e.enumtypid = t.oid
|
|
804
|
+
ORDER BY t.typname, e.enumsortorder
|
|
805
|
+
SQL
|
|
806
|
+
end
|
|
807
|
+
|
|
808
|
+
def query_check_constraints(conn)
|
|
809
|
+
conn.exec_query(<<~SQL).to_a
|
|
810
|
+
SELECT conrelid::regclass AS table_name, pg_get_constraintdef(oid) AS check_def
|
|
811
|
+
FROM pg_constraint
|
|
812
|
+
WHERE contype = 'c' AND connamespace = 'public'::regnamespace
|
|
813
|
+
SQL
|
|
814
|
+
end
|
|
815
|
+
|
|
816
|
+
# Get approximate row counts for all tables from pg_stat_user_tables.
|
|
817
|
+
# Returns Hash of table_name => integer count.
|
|
818
|
+
def query_row_counts(conn)
|
|
819
|
+
conn.exec_query(<<~SQL).to_a.each_with_object({}) do |r, h|
|
|
820
|
+
SELECT relname, n_live_tup FROM pg_stat_user_tables WHERE schemaname = 'public'
|
|
821
|
+
SQL
|
|
822
|
+
h[r["relname"]] = r["n_live_tup"].to_i
|
|
823
|
+
end
|
|
824
|
+
end
|
|
825
|
+
|
|
826
|
+
# Query lookup values for small FK-target tables.
|
|
827
|
+
# Returns Hash of table_name => "id1=name1, id2=name2, ..."
|
|
828
|
+
def discover_lookup_values(conn, fk_target_tables, columns_by_table, pk_set)
|
|
829
|
+
result = {}
|
|
830
|
+
return result if fk_target_tables.empty?
|
|
831
|
+
|
|
832
|
+
# Get approximate row counts
|
|
833
|
+
stats_rows = conn.exec_query(<<~SQL).to_a
|
|
834
|
+
SELECT relname, n_live_tup FROM pg_stat_user_tables WHERE schemaname = 'public'
|
|
835
|
+
SQL
|
|
836
|
+
|
|
837
|
+
row_counts = stats_rows.each_with_object({}) do |r, h|
|
|
838
|
+
h[r["relname"]] = r["n_live_tup"].to_i
|
|
839
|
+
end
|
|
840
|
+
|
|
841
|
+
fk_target_tables.each do |table|
|
|
842
|
+
count = row_counts[table]
|
|
843
|
+
next if count.nil? || count >= 50
|
|
844
|
+
|
|
845
|
+
columns = columns_by_table[table]
|
|
846
|
+
next unless columns
|
|
847
|
+
|
|
848
|
+
# Find PK column
|
|
849
|
+
pk_col = columns.find { |c| pk_set.include?("#{table}.#{c['column_name']}") }
|
|
850
|
+
next unless pk_col
|
|
851
|
+
|
|
852
|
+
# Find first VARCHAR/TEXT non-PK column as display value
|
|
853
|
+
display_col = columns.find do |c|
|
|
854
|
+
next false if pk_set.include?("#{table}.#{c['column_name']}")
|
|
855
|
+
|
|
856
|
+
mapped = self.class.map_type(c["data_type"])
|
|
857
|
+
%w[VARCHAR TEXT].include?(mapped)
|
|
858
|
+
end
|
|
859
|
+
next unless display_col
|
|
860
|
+
|
|
861
|
+
pk_name = pk_col["column_name"]
|
|
862
|
+
display_name = display_col["column_name"]
|
|
863
|
+
|
|
864
|
+
begin
|
|
865
|
+
values_rows = conn.exec_query(
|
|
866
|
+
"SELECT #{conn.quote_column_name(pk_name)}, #{conn.quote_column_name(display_name)} " \
|
|
867
|
+
"FROM #{conn.quote_table_name(table)} " \
|
|
868
|
+
"ORDER BY #{conn.quote_column_name(pk_name)} LIMIT 50"
|
|
869
|
+
).to_a
|
|
870
|
+
|
|
871
|
+
if values_rows.any?
|
|
872
|
+
pairs = values_rows.map { |r| "#{r[pk_name]}=#{r[display_name]}" }
|
|
873
|
+
result[table] = pairs.join(", ")
|
|
874
|
+
end
|
|
875
|
+
rescue StandardError
|
|
876
|
+
# Skip tables that fail (e.g., permission issues)
|
|
877
|
+
end
|
|
878
|
+
end
|
|
879
|
+
|
|
880
|
+
result
|
|
881
|
+
end
|
|
882
|
+
end
|
|
883
|
+
end
|
|
884
|
+
end
|