code_to_query 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +23 -0
- data/README.md +167 -0
- data/lib/code_to_query/compiler.rb +674 -0
- data/lib/code_to_query/configuration.rb +92 -0
- data/lib/code_to_query/context/builder.rb +1087 -0
- data/lib/code_to_query/context/pack.rb +36 -0
- data/lib/code_to_query/errors.rb +5 -0
- data/lib/code_to_query/guardrails/explain_gate.rb +229 -0
- data/lib/code_to_query/guardrails/sql_linter.rb +335 -0
- data/lib/code_to_query/llm_client.rb +46 -0
- data/lib/code_to_query/performance/cache.rb +250 -0
- data/lib/code_to_query/performance/optimizer.rb +396 -0
- data/lib/code_to_query/planner.rb +289 -0
- data/lib/code_to_query/policies/pundit_adapter.rb +71 -0
- data/lib/code_to_query/providers/base.rb +173 -0
- data/lib/code_to_query/providers/local.rb +84 -0
- data/lib/code_to_query/providers/openai.rb +581 -0
- data/lib/code_to_query/query.rb +385 -0
- data/lib/code_to_query/railtie.rb +16 -0
- data/lib/code_to_query/runner.rb +188 -0
- data/lib/code_to_query/validator.rb +203 -0
- data/lib/code_to_query/version.rb +6 -0
- data/lib/code_to_query.rb +90 -0
- data/tasks/code_to_query.rake +326 -0
- metadata +225 -0
@@ -0,0 +1,1087 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Context::Builder inspects the DB and app to produce a context pack file.
|
4
|
+
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
begin
|
8
|
+
require 'active_record'
|
9
|
+
rescue LoadError
|
10
|
+
end
|
11
|
+
|
12
|
+
module CodeToQuery
|
13
|
+
module Context
|
14
|
+
# rubocop:disable Metrics/ClassLength
|
15
|
+
class Builder
|
16
|
+
def self.bootstrap!
|
17
|
+
new.bootstrap!
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(config = CodeToQuery.config)
|
21
|
+
@config = config
|
22
|
+
end
|
23
|
+
|
24
|
+
# Build a full pack and write it to disk
|
25
|
+
def bootstrap!
|
26
|
+
# First attempt to extract schema
|
27
|
+
schema_data = extract_schema
|
28
|
+
initial_count = schema_data[:tables]&.length || schema_data['tables']&.length || 0
|
29
|
+
@config.logger.info("[code_to_query] Schema data structure: #{schema_data.keys} with #{initial_count} tables")
|
30
|
+
|
31
|
+
# If schema looks empty, try scanning the app to force-load models/connection, then retry
|
32
|
+
models_data = scan_app
|
33
|
+
# Optionally enrich with static scan
|
34
|
+
if @config.prefer_static_scan
|
35
|
+
static_data = static_scan_app
|
36
|
+
models_data = deep_merge_models(models_data, static_data)
|
37
|
+
end
|
38
|
+
if initial_count.to_i.zero?
|
39
|
+
schema_data = extract_schema
|
40
|
+
retry_count = schema_data[:tables]&.length || schema_data['tables']&.length || 0
|
41
|
+
@config.logger.info("[code_to_query] Retried schema extraction after app scan: #{retry_count} tables")
|
42
|
+
end
|
43
|
+
|
44
|
+
pack = Pack.new(
|
45
|
+
schema: schema_data,
|
46
|
+
models: models_data,
|
47
|
+
glossary: enrich_glossary_with_llm(generate_glossary(schema_data), schema_data, models_data),
|
48
|
+
policies: collect_policies,
|
49
|
+
hints: { performance: [], joins: extract_join_hints(schema_data) }
|
50
|
+
)
|
51
|
+
write_pack(pack)
|
52
|
+
pack
|
53
|
+
end
|
54
|
+
|
55
|
+
# --- Components (stubs that won't crash) ---
|
56
|
+
|
57
|
+
def extract_schema
|
58
|
+
unless defined?(ActiveRecord::Base)
|
59
|
+
return { tables: [], version: 'unknown', adapter: 'none' }
|
60
|
+
end
|
61
|
+
|
62
|
+
# Try multiple approaches to establish connection
|
63
|
+
connection = nil
|
64
|
+
connection_attempts = 0
|
65
|
+
max_attempts = 3
|
66
|
+
|
67
|
+
while connection.nil? && connection_attempts < max_attempts
|
68
|
+
connection_attempts += 1
|
69
|
+
begin
|
70
|
+
# Force-establish a connection (Rails defers until first use)
|
71
|
+
ActiveRecord::Base.connection
|
72
|
+
|
73
|
+
# Verify connection is actually working
|
74
|
+
if ActiveRecord::Base.connected?
|
75
|
+
connection = ActiveRecord::Base.connection
|
76
|
+
@config.logger.info("[code_to_query] Connected to database with adapter: #{connection.adapter_name} (attempt #{connection_attempts})")
|
77
|
+
break
|
78
|
+
else
|
79
|
+
@config.logger.info("[code_to_query] Database not connected on attempt #{connection_attempts}")
|
80
|
+
sleep(0.1) if connection_attempts < max_attempts
|
81
|
+
end
|
82
|
+
rescue StandardError => e
|
83
|
+
@config.logger.warn("[code_to_query] Connection attempt #{connection_attempts} failed: #{e.message}")
|
84
|
+
sleep(0.1) if connection_attempts < max_attempts
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
unless connection
|
89
|
+
error_msg = "Failed to establish database connection after #{max_attempts} attempts"
|
90
|
+
@config.logger.warn("[code_to_query] #{error_msg}")
|
91
|
+
return { tables: [], version: 'unknown', adapter: @config.adapter.to_s, error: error_msg }
|
92
|
+
end
|
93
|
+
|
94
|
+
tables = list_tables(connection)
|
95
|
+
@config.logger.info("[code_to_query] Found #{tables.length} tables: #{tables.join(', ')}") if tables.any?
|
96
|
+
@config.logger.info('[code_to_query] No tables found') if tables.empty?
|
97
|
+
|
98
|
+
result = {
|
99
|
+
tables: tables.map do |table_name|
|
100
|
+
{
|
101
|
+
name: table_name,
|
102
|
+
columns: extract_table_columns(connection, table_name),
|
103
|
+
indexes: extract_table_indexes(connection, table_name),
|
104
|
+
foreign_keys: extract_foreign_keys(connection, table_name),
|
105
|
+
constraints: extract_table_constraints(connection, table_name)
|
106
|
+
}
|
107
|
+
end,
|
108
|
+
version: extract_schema_version(connection),
|
109
|
+
adapter: connection.adapter_name.downcase
|
110
|
+
}
|
111
|
+
|
112
|
+
@config.logger.info("[code_to_query] Schema extraction completed with #{result[:tables].length} tables")
|
113
|
+
result
|
114
|
+
rescue StandardError => e
|
115
|
+
@config.logger.warn("[code_to_query] Schema extraction failed: #{e.message}")
|
116
|
+
@config.logger.warn("[code_to_query] Backtrace: #{e.backtrace.first(5).join("\n")}")
|
117
|
+
{ tables: [], version: 'unknown', adapter: @config.adapter.to_s, error: e.message }
|
118
|
+
end
|
119
|
+
|
120
|
+
def scan_app
|
121
|
+
models = {}
|
122
|
+
associations = {}
|
123
|
+
validations = {}
|
124
|
+
scopes = {}
|
125
|
+
|
126
|
+
if defined?(ActiveRecord::Base)
|
127
|
+
# Ensure models are loaded so descendants is populated
|
128
|
+
if defined?(Rails) && Rails.respond_to?(:application)
|
129
|
+
begin
|
130
|
+
Rails.application.eager_load!
|
131
|
+
rescue StandardError => e
|
132
|
+
@config.logger.warn("[code_to_query] Eager load failed: #{e.message}")
|
133
|
+
end
|
134
|
+
end
|
135
|
+
ActiveRecord::Base.descendants.each do |model|
|
136
|
+
next unless model.table_exists?
|
137
|
+
|
138
|
+
model_name = model.name
|
139
|
+
table_name = model.table_name
|
140
|
+
|
141
|
+
models[model_name] = {
|
142
|
+
table_name: table_name,
|
143
|
+
primary_key: model.primary_key,
|
144
|
+
inheritance_column: model.inheritance_column,
|
145
|
+
timestamps: has_timestamps?(model),
|
146
|
+
soft_delete: has_soft_delete?(model),
|
147
|
+
enums: extract_model_enums(model)
|
148
|
+
}
|
149
|
+
|
150
|
+
associations[model_name] = extract_model_associations(model)
|
151
|
+
validations[model_name] = extract_model_validations(model)
|
152
|
+
scopes[model_name] = extract_model_scopes(model)
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
{
|
157
|
+
models: models,
|
158
|
+
associations: associations,
|
159
|
+
validations: validations,
|
160
|
+
scopes: scopes
|
161
|
+
}
|
162
|
+
rescue StandardError => e
|
163
|
+
@config.logger.warn("[code_to_query] App scanning failed: #{e.message}")
|
164
|
+
{ models: {}, associations: {}, validations: {}, scopes: {}, error: e.message }
|
165
|
+
end
|
166
|
+
|
167
|
+
def generate_glossary(existing_schema = nil)
|
168
|
+
glossary = {}
|
169
|
+
|
170
|
+
# Auto-generate from schema (prefer the already extracted schema)
|
171
|
+
schema = existing_schema || extract_schema
|
172
|
+
tables = schema[:tables] || schema['tables'] || []
|
173
|
+
tables.each do |table|
|
174
|
+
table_name = table[:name] || table['name']
|
175
|
+
|
176
|
+
# Generate table synonyms
|
177
|
+
synonyms = generate_table_synonyms(table_name)
|
178
|
+
glossary[table_name] = synonyms if synonyms.any?
|
179
|
+
|
180
|
+
# Generate column synonyms
|
181
|
+
Array(table[:columns] || table['columns']).each do |column|
|
182
|
+
column_name = column[:name] || column['name']
|
183
|
+
sql_type = column[:sql_type] || column['sql_type']
|
184
|
+
column_synonyms = generate_column_synonyms(column_name, sql_type)
|
185
|
+
if column_synonyms.any?
|
186
|
+
key = "#{table_name}.#{column_name}"
|
187
|
+
glossary[key] = column_synonyms
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Add business-specific glossary
|
193
|
+
glossary.merge!(load_business_glossary)
|
194
|
+
|
195
|
+
glossary
|
196
|
+
rescue StandardError => e
|
197
|
+
@config.logger.warn("[code_to_query] Glossary generation failed: #{e.message}")
|
198
|
+
{ error: e.message }
|
199
|
+
end
|
200
|
+
|
201
|
+
def collect_policies
|
202
|
+
policies = {
|
203
|
+
enforced_predicates: {},
|
204
|
+
column_access: {},
|
205
|
+
row_level_security: {},
|
206
|
+
audit_requirements: {}
|
207
|
+
}
|
208
|
+
|
209
|
+
# Get policies from configuration
|
210
|
+
if @config.policy_adapter.respond_to?(:call)
|
211
|
+
begin
|
212
|
+
# In a real implementation, you'd pass the actual user context
|
213
|
+
user_policies = @config.policy_adapter.call(nil)
|
214
|
+
policies[:enforced_predicates] = user_policies if user_policies.is_a?(Hash)
|
215
|
+
rescue StandardError => e
|
216
|
+
@config.logger.warn("[code_to_query] Policy collection failed: #{e.message}")
|
217
|
+
end
|
218
|
+
end
|
219
|
+
|
220
|
+
# Extract policies from models (if using Pundit, CanCan, etc.)
|
221
|
+
policies.merge!(extract_authorization_policies)
|
222
|
+
|
223
|
+
policies
|
224
|
+
rescue StandardError => e
|
225
|
+
@config.logger.warn("[code_to_query] Policy collection failed: #{e.message}")
|
226
|
+
{ enforced_predicates: {}, error: e.message }
|
227
|
+
end
|
228
|
+
|
229
|
+
def verify!
|
230
|
+
path = @config.context_pack_path.to_s
|
231
|
+
raise "Context pack not found at #{path}" unless File.exist?(path)
|
232
|
+
|
233
|
+
json = JSON.parse(File.read(path))
|
234
|
+
raise 'Context pack missing schema.tables' unless json.dig('schema', 'tables').is_a?(Array)
|
235
|
+
|
236
|
+
true
|
237
|
+
end
|
238
|
+
|
239
|
+
private
|
240
|
+
|
241
|
+
# Lightweight static scan using regex heuristics to avoid runtime execution.
|
242
|
+
# If parser/rubocop-ast is available, we could replace these regexes with AST parsing.
|
243
|
+
def static_scan_app
|
244
|
+
result = { models: {}, associations: {}, validations: {}, scopes: {} }
|
245
|
+
dirs = Array(@config.static_scan_dirs).compact
|
246
|
+
dirs.each do |dir|
|
247
|
+
next unless Dir.exist?(dir)
|
248
|
+
|
249
|
+
Dir.glob(File.join(dir, '**/*.rb')).each do |file|
|
250
|
+
begin
|
251
|
+
content = File.read(file)
|
252
|
+
rescue StandardError
|
253
|
+
next
|
254
|
+
end
|
255
|
+
model_name = infer_model_name_from_path(file)
|
256
|
+
next unless model_name
|
257
|
+
|
258
|
+
result[:models][model_name] ||= { table_name: nil, primary_key: 'id', inheritance_column: 'type', timestamps: true, soft_delete: false, enums: {} }
|
259
|
+
result[:scopes][model_name] ||= {}
|
260
|
+
|
261
|
+
# enum lines: enum attachment_type: { video: 0, image: 1 }
|
262
|
+
content.scan(/\benum\s+([a-zA-Z0-9_]+):\s*\{([^}]+)\}/).each do |(col, body)|
|
263
|
+
mapping = {}
|
264
|
+
body.split(',').each do |pair|
|
265
|
+
if (m = pair.strip.match(/([a-zA-Z0-9_]+):\s*(\d+)/))
|
266
|
+
mapping[m[1]] = m[2].to_i
|
267
|
+
end
|
268
|
+
end
|
269
|
+
next if mapping.empty?
|
270
|
+
|
271
|
+
result[:models][model_name][:enums][col] ||= {}
|
272
|
+
result[:models][model_name][:enums][col].merge!(mapping)
|
273
|
+
end
|
274
|
+
|
275
|
+
# constant maps: ATTACHMENT_TYPES = { video: 0, image: 1 }
|
276
|
+
content.scan(/([A-Z][A-Z0-9_]+)\s*=\s*\{([^}]+)\}/).each do |(const_name, body)|
|
277
|
+
mapping = {}
|
278
|
+
body.split(',').each do |pair|
|
279
|
+
if (m = pair.strip.match(/([a-zA-Z0-9_]+):\s*(\d+)/))
|
280
|
+
mapping[m[1]] = m[2].to_i
|
281
|
+
end
|
282
|
+
end
|
283
|
+
next if mapping.empty?
|
284
|
+
|
285
|
+
base = const_name.downcase.sub(/_types\z/, '').sub(/_type\z/, '').sub(/_statuses\z/, '').sub(/_status\z/, '').sub(/_kinds\z/, '').sub(/_kind\z/, '')
|
286
|
+
candidate_cols = ["#{base}_type", "#{base}_status"]
|
287
|
+
col = candidate_cols.find { |c| content.include?(c) }
|
288
|
+
next unless col
|
289
|
+
|
290
|
+
result[:models][model_name][:enums][col] ||= {}
|
291
|
+
result[:models][model_name][:enums][col].merge!(mapping)
|
292
|
+
end
|
293
|
+
|
294
|
+
# scopes: scope :with_videos, -> { where(attachment_type: 0) }
|
295
|
+
content.scan(/scope\s+:([a-zA-Z0-9_]+),\s*->\s*\{([^}]+)\}/m).each do |(name, body)|
|
296
|
+
where = body.strip.gsub(/\s+/, ' ')
|
297
|
+
result[:scopes][model_name][name] = { type: 'scope', arity: -1, where: where[0..200] }
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
result
|
302
|
+
rescue StandardError => e
|
303
|
+
@config.logger.warn("[code_to_query] Static scan failed: #{e.message}")
|
304
|
+
{ models: {}, associations: {}, validations: {}, scopes: {} }
|
305
|
+
end
|
306
|
+
|
307
|
+
def deep_merge_models(primary, extra)
|
308
|
+
merged = Marshal.load(Marshal.dump(primary))
|
309
|
+
%i[models associations validations scopes].each do |key|
|
310
|
+
merged[key] ||= {}
|
311
|
+
(extra[key] || {}).each do |k, v|
|
312
|
+
merged[key][k] = if v.is_a?(Hash) && merged[key][k].is_a?(Hash)
|
313
|
+
merged[key][k].merge(v) { |_kk, a, b| a.is_a?(Hash) && b.is_a?(Hash) ? a.merge(b) : b }
|
314
|
+
else
|
315
|
+
v
|
316
|
+
end
|
317
|
+
end
|
318
|
+
end
|
319
|
+
merged
|
320
|
+
rescue StandardError
|
321
|
+
primary
|
322
|
+
end
|
323
|
+
|
324
|
+
def infer_model_name_from_path(path)
|
325
|
+
# app/models/question.rb -> Question
|
326
|
+
base = File.basename(path, '.rb')
|
327
|
+
return nil if base.empty?
|
328
|
+
|
329
|
+
base.split('/').last.split('_').map(&:capitalize).join
|
330
|
+
end
|
331
|
+
|
332
|
+
def extract_join_hints(schema)
|
333
|
+
tables = Array(schema[:tables] || schema['tables'] || [])
|
334
|
+
fks = []
|
335
|
+
tables.each do |t|
|
336
|
+
tname = t[:name] || t['name']
|
337
|
+
cols = Array(t[:columns] || t['columns'])
|
338
|
+
cols.each do |c|
|
339
|
+
cname = c[:name] || c['name']
|
340
|
+
if cname.end_with?('_id')
|
341
|
+
ref = cname.sub(/_id\z/, 's')
|
342
|
+
fks << { from: tname, column: cname, to: ref }
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
fks
|
347
|
+
rescue StandardError
|
348
|
+
[]
|
349
|
+
end
|
350
|
+
|
351
|
+
def list_tables(connection)
|
352
|
+
adapter_name = connection.adapter_name.downcase
|
353
|
+
@config.logger.info("[code_to_query] Detecting tables for adapter: #{adapter_name}")
|
354
|
+
|
355
|
+
# Try Rails helpers first
|
356
|
+
names = []
|
357
|
+
begin
|
358
|
+
if connection.respond_to?(:data_sources)
|
359
|
+
names = Array(connection.data_sources)
|
360
|
+
@config.logger.info("[code_to_query] Using data_sources method, found #{names.length} tables")
|
361
|
+
else
|
362
|
+
names = Array(connection.tables)
|
363
|
+
@config.logger.info("[code_to_query] Using tables method, found #{names.length} tables")
|
364
|
+
end
|
365
|
+
rescue StandardError => e
|
366
|
+
@config.logger.warn("[code_to_query] Rails helpers failed: #{e.message}")
|
367
|
+
names = []
|
368
|
+
end
|
369
|
+
return names.uniq if names.any?
|
370
|
+
|
371
|
+
# Fallback by adapter - use actual adapter name from connection
|
372
|
+
case adapter_name
|
373
|
+
when 'postgresql'
|
374
|
+
@config.logger.info('[code_to_query] Trying PostgreSQL specific queries')
|
375
|
+
# First try with search path
|
376
|
+
begin
|
377
|
+
search_path = connection.respond_to?(:schema_search_path) ? connection.schema_search_path.to_s : 'public'
|
378
|
+
schemas = search_path.split(',').map { |s| s.strip.gsub('"', '') }
|
379
|
+
@config.logger.info("[code_to_query] Using schemas: #{schemas.join(', ')}")
|
380
|
+
sql = <<~SQL
|
381
|
+
SELECT schemaname, tablename
|
382
|
+
FROM pg_tables
|
383
|
+
WHERE schemaname = ANY (ARRAY[#{schemas.map { |s| connection.quote(s) }.join(', ')}])
|
384
|
+
SQL
|
385
|
+
result = connection.execute(sql)
|
386
|
+
pg_names = result.map { |r| r['tablename'] || r[:tablename] }.compact.uniq
|
387
|
+
@config.logger.info("[code_to_query] Found #{pg_names.length} tables via pg_tables: #{pg_names.join(', ')}")
|
388
|
+
return pg_names if pg_names.any?
|
389
|
+
rescue StandardError => e
|
390
|
+
@config.logger.warn("[code_to_query] pg_tables query failed: #{e.message}")
|
391
|
+
end
|
392
|
+
|
393
|
+
# Fallback to information_schema
|
394
|
+
begin
|
395
|
+
info = connection.execute(<<~SQL)
|
396
|
+
SELECT table_schema, table_name
|
397
|
+
FROM information_schema.tables
|
398
|
+
WHERE table_type = 'BASE TABLE'
|
399
|
+
AND table_schema NOT IN ('pg_catalog','information_schema')
|
400
|
+
SQL
|
401
|
+
info_names = info.map { |r| r['table_name'] || r[:table_name] }.compact.uniq
|
402
|
+
@config.logger.info("[code_to_query] Found #{info_names.length} tables via information_schema: #{info_names.join(', ')}")
|
403
|
+
return info_names if info_names.any?
|
404
|
+
rescue StandardError => e
|
405
|
+
@config.logger.warn("[code_to_query] information_schema query failed: #{e.message}")
|
406
|
+
end
|
407
|
+
when 'mysql2', 'mysql'
|
408
|
+
@config.logger.info('[code_to_query] Trying MySQL specific queries')
|
409
|
+
begin
|
410
|
+
result = connection.execute("SHOW FULL TABLES WHERE Table_type = 'BASE TABLE'")
|
411
|
+
mysql_names = result.map { |r| r.values.first }.compact.uniq
|
412
|
+
@config.logger.info("[code_to_query] Found #{mysql_names.length} tables via SHOW TABLES: #{mysql_names.join(', ')}")
|
413
|
+
return mysql_names if mysql_names.any?
|
414
|
+
rescue StandardError => e
|
415
|
+
@config.logger.warn("[code_to_query] SHOW TABLES query failed: #{e.message}")
|
416
|
+
end
|
417
|
+
when 'sqlite3', 'sqlite'
|
418
|
+
@config.logger.info('[code_to_query] Trying SQLite specific queries')
|
419
|
+
begin
|
420
|
+
result = connection.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
421
|
+
sqlite_names = result.map { |r| r['name'] || r[:name] }.compact.uniq
|
422
|
+
@config.logger.info("[code_to_query] Found #{sqlite_names.length} tables via sqlite_master: #{sqlite_names.join(', ')}")
|
423
|
+
return sqlite_names if sqlite_names.any?
|
424
|
+
rescue StandardError => e
|
425
|
+
@config.logger.warn("[code_to_query] sqlite_master query failed: #{e.message}")
|
426
|
+
end
|
427
|
+
else
|
428
|
+
@config.logger.info("[code_to_query] Unknown adapter '#{adapter_name}', trying generic methods")
|
429
|
+
end
|
430
|
+
|
431
|
+
# Last resort: parse db/schema.rb if present
|
432
|
+
@config.logger.info('[code_to_query] Trying to parse db/schema.rb as last resort')
|
433
|
+
parsed = parse_schema_rb
|
434
|
+
if parsed.any?
|
435
|
+
@config.logger.info("[code_to_query] Found #{parsed.length} tables in schema.rb: #{parsed.join(', ')}")
|
436
|
+
return parsed
|
437
|
+
end
|
438
|
+
|
439
|
+
@config.logger.info('[code_to_query] No tables found through any method')
|
440
|
+
[]
|
441
|
+
end
|
442
|
+
|
443
|
+
def parse_schema_rb
|
444
|
+
return [] unless defined?(Rails)
|
445
|
+
|
446
|
+
schema_path = Rails.root.join('db', 'schema.rb')
|
447
|
+
unless File.exist?(schema_path)
|
448
|
+
@config.logger.info("[code_to_query] schema.rb not found at #{schema_path}")
|
449
|
+
return []
|
450
|
+
end
|
451
|
+
|
452
|
+
begin
|
453
|
+
content = File.read(schema_path)
|
454
|
+
# Match lines like: create_table "table_name", force: :cascade do |t|
|
455
|
+
tables = content.scan(/create_table\s+"([^"]+)"/).flatten.uniq
|
456
|
+
@config.logger.info("[code_to_query] Parsed #{tables.length} table names from schema.rb: #{tables.join(', ')}")
|
457
|
+
tables
|
458
|
+
rescue StandardError => e
|
459
|
+
@config.logger.warn("[code_to_query] Failed to parse schema.rb: #{e.message}")
|
460
|
+
[]
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def extract_table_columns(connection, table_name)
|
465
|
+
primary_key_name = connection.primary_key(table_name)
|
466
|
+
|
467
|
+
connection.columns(table_name).map do |col|
|
468
|
+
is_primary = col.name == primary_key_name
|
469
|
+
|
470
|
+
{
|
471
|
+
name: col.name,
|
472
|
+
sql_type: col.sql_type,
|
473
|
+
type: col.type,
|
474
|
+
null: col.null,
|
475
|
+
default: col.default,
|
476
|
+
primary: is_primary,
|
477
|
+
auto_increment: determine_auto_increment(col, connection: connection, is_primary: is_primary),
|
478
|
+
comment: extract_column_comment(connection, table_name, col.name)
|
479
|
+
}
|
480
|
+
end
|
481
|
+
rescue StandardError => e
|
482
|
+
@config.logger.warn("[code_to_query] Failed to extract columns for #{table_name}: #{e.message}")
|
483
|
+
[]
|
484
|
+
end
|
485
|
+
|
486
|
+
def determine_auto_increment(column, connection: nil, is_primary: false)
|
487
|
+
# Handle different database adapters and Rails versions
|
488
|
+
return column.auto_increment? if column.respond_to?(:auto_increment?)
|
489
|
+
return column.serial? if column.respond_to?(:serial?)
|
490
|
+
return column.identity? if column.respond_to?(:identity?)
|
491
|
+
|
492
|
+
# Fallback: check based on sql_type and database-specific patterns
|
493
|
+
return false if column.sql_type.nil?
|
494
|
+
|
495
|
+
sql_type_lower = column.sql_type.downcase
|
496
|
+
|
497
|
+
case sql_type_lower
|
498
|
+
when /serial/, /identity/
|
499
|
+
# PostgreSQL serial, bigserial, identity columns
|
500
|
+
true
|
501
|
+
when /int.*auto_increment/, /auto_increment/
|
502
|
+
# MySQL auto_increment columns
|
503
|
+
true
|
504
|
+
else
|
505
|
+
# Check default value for sequence patterns (PostgreSQL)
|
506
|
+
return true if column.default.to_s =~ /nextval\(/i
|
507
|
+
|
508
|
+
# SQLite special case: INTEGER PRIMARY KEY is auto-increment
|
509
|
+
if connection && @config.adapter == :sqlite && is_primary && (sql_type_lower == 'integer')
|
510
|
+
return true
|
511
|
+
end
|
512
|
+
|
513
|
+
false
|
514
|
+
end
|
515
|
+
rescue StandardError
|
516
|
+
false
|
517
|
+
end
|
518
|
+
|
519
|
+
def extract_table_indexes(connection, table_name)
|
520
|
+
connection.indexes(table_name).map do |idx|
|
521
|
+
{
|
522
|
+
name: idx.name,
|
523
|
+
columns: idx.columns,
|
524
|
+
unique: idx.unique,
|
525
|
+
partial: idx.try(:where).present?,
|
526
|
+
type: idx.try(:type) || 'btree'
|
527
|
+
}
|
528
|
+
end
|
529
|
+
rescue StandardError => e
|
530
|
+
@config.logger.warn("[code_to_query] Failed to extract indexes for #{table_name}: #{e.message}")
|
531
|
+
[]
|
532
|
+
end
|
533
|
+
|
534
|
+
def extract_foreign_keys(connection, table_name)
|
535
|
+
if connection.respond_to?(:foreign_keys)
|
536
|
+
connection.foreign_keys(table_name).map do |fk|
|
537
|
+
{
|
538
|
+
name: fk.name,
|
539
|
+
column: fk.column,
|
540
|
+
to_table: fk.to_table,
|
541
|
+
primary_key: fk.primary_key,
|
542
|
+
on_delete: fk.on_delete,
|
543
|
+
on_update: fk.on_update
|
544
|
+
}
|
545
|
+
end
|
546
|
+
else
|
547
|
+
[]
|
548
|
+
end
|
549
|
+
rescue StandardError => e
|
550
|
+
@config.logger.warn("[code_to_query] Failed to extract foreign keys for #{table_name}: #{e.message}")
|
551
|
+
[]
|
552
|
+
end
|
553
|
+
|
554
|
+
def extract_table_constraints(connection, table_name)
|
555
|
+
constraints = []
|
556
|
+
|
557
|
+
# Check constraints (PostgreSQL specific)
|
558
|
+
if @config.adapter == :postgres
|
559
|
+
begin
|
560
|
+
check_constraints = connection.execute(<<~SQL)
|
561
|
+
SELECT conname, pg_get_constraintdef(oid) as definition
|
562
|
+
FROM pg_constraint#{' '}
|
563
|
+
WHERE conrelid = '#{table_name}'::regclass#{' '}
|
564
|
+
AND contype = 'c'
|
565
|
+
SQL
|
566
|
+
|
567
|
+
check_constraints.each do |row|
|
568
|
+
constraints << {
|
569
|
+
name: row['conname'],
|
570
|
+
type: 'check',
|
571
|
+
definition: row['definition']
|
572
|
+
}
|
573
|
+
end
|
574
|
+
rescue StandardError
|
575
|
+
# Ignore if we can't get check constraints
|
576
|
+
end
|
577
|
+
end
|
578
|
+
|
579
|
+
constraints
|
580
|
+
rescue StandardError => e
|
581
|
+
@config.logger.warn("[code_to_query] Failed to extract constraints for #{table_name}: #{e.message}")
|
582
|
+
[]
|
583
|
+
end
|
584
|
+
|
585
|
+
def extract_column_comment(connection, table_name, column_name)
|
586
|
+
case @config.adapter
|
587
|
+
when :postgres
|
588
|
+
result = connection.execute(<<~SQL)
|
589
|
+
SELECT col_description(pgc.oid, pga.attnum) as comment
|
590
|
+
FROM pg_class pgc
|
591
|
+
JOIN pg_attribute pga ON pgc.oid = pga.attrelid
|
592
|
+
WHERE pgc.relname = '#{table_name}'#{' '}
|
593
|
+
AND pga.attname = '#{column_name}'
|
594
|
+
SQL
|
595
|
+
result.first&.fetch('comment', nil)
|
596
|
+
when :mysql
|
597
|
+
result = connection.execute(<<~SQL)
|
598
|
+
SELECT COLUMN_COMMENT as comment
|
599
|
+
FROM INFORMATION_SCHEMA.COLUMNS
|
600
|
+
WHERE TABLE_SCHEMA = DATABASE()
|
601
|
+
AND TABLE_NAME = '#{table_name}'
|
602
|
+
AND COLUMN_NAME = '#{column_name}'
|
603
|
+
SQL
|
604
|
+
result.first&.fetch('comment', nil)
|
605
|
+
end
|
606
|
+
rescue StandardError
|
607
|
+
nil
|
608
|
+
end
|
609
|
+
|
610
|
+
def extract_schema_version(connection)
|
611
|
+
case @config.adapter
|
612
|
+
when :postgres
|
613
|
+
connection.execute('SELECT version()').first['version']
|
614
|
+
when :mysql
|
615
|
+
connection.execute('SELECT version()').first['version()']
|
616
|
+
when :sqlite
|
617
|
+
connection.execute('SELECT sqlite_version()').first['sqlite_version()']
|
618
|
+
else
|
619
|
+
'unknown'
|
620
|
+
end
|
621
|
+
rescue StandardError
|
622
|
+
'unknown'
|
623
|
+
end
|
624
|
+
|
625
|
+
def has_timestamps?(model)
|
626
|
+
model.column_names.include?('created_at') && model.column_names.include?('updated_at')
|
627
|
+
end
|
628
|
+
|
629
|
+
def has_soft_delete?(model)
|
630
|
+
model.column_names.include?('deleted_at') ||
|
631
|
+
(model.respond_to?(:paranoid?) && model.paranoid?)
|
632
|
+
end
|
633
|
+
|
634
|
+
def extract_model_associations(model)
|
635
|
+
associations = {}
|
636
|
+
|
637
|
+
model.reflect_on_all_associations.each do |assoc|
|
638
|
+
# Skip associations that point to non-existent classes
|
639
|
+
begin
|
640
|
+
# Check if the class exists by trying to constantize it
|
641
|
+
assoc.class_name.constantize
|
642
|
+
rescue NameError
|
643
|
+
@config.logger.info("[code_to_query] Skipping association #{assoc.name} for #{model.name}: class #{assoc.class_name} not found")
|
644
|
+
next
|
645
|
+
rescue StandardError => e
|
646
|
+
@config.logger.info("[code_to_query] Skipping association #{assoc.name} for #{model.name}: #{e.message}")
|
647
|
+
next
|
648
|
+
end
|
649
|
+
|
650
|
+
# Additional check: verify the association doesn't cause errors
|
651
|
+
begin
|
652
|
+
associations[assoc.name] = {
|
653
|
+
type: assoc.macro,
|
654
|
+
class_name: assoc.class_name,
|
655
|
+
foreign_key: assoc.foreign_key,
|
656
|
+
primary_key: assoc.association_primary_key,
|
657
|
+
through: assoc.options[:through],
|
658
|
+
dependent: assoc.options[:dependent],
|
659
|
+
polymorphic: assoc.options[:polymorphic],
|
660
|
+
as: assoc.options[:as]
|
661
|
+
}
|
662
|
+
rescue StandardError => e
|
663
|
+
@config.logger.info("[code_to_query] Skipping problematic association #{assoc.name} for #{model.name}: #{e.message}")
|
664
|
+
next
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
associations
|
669
|
+
rescue StandardError => e
|
670
|
+
@config.logger.warn("[code_to_query] Failed to extract associations for #{model.name}: #{e.message}")
|
671
|
+
{}
|
672
|
+
end
|
673
|
+
|
674
|
+
def extract_model_validations(model)
|
675
|
+
validations = {}
|
676
|
+
|
677
|
+
model.validators.each do |validator|
|
678
|
+
validator.attributes.each do |attr|
|
679
|
+
validations[attr] ||= []
|
680
|
+
validations[attr] << {
|
681
|
+
type: validator.class.name,
|
682
|
+
options: validator.options
|
683
|
+
}
|
684
|
+
end
|
685
|
+
end
|
686
|
+
|
687
|
+
validations
|
688
|
+
rescue StandardError => e
|
689
|
+
@config.logger.warn("[code_to_query] Failed to extract validations for #{model.name}: #{e.message}")
|
690
|
+
{}
|
691
|
+
end
|
692
|
+
|
693
|
+
def extract_model_enums(model)
|
694
|
+
enums = {}
|
695
|
+
begin
|
696
|
+
# 1) Native Rails enums
|
697
|
+
if model.respond_to?(:defined_enums)
|
698
|
+
model.defined_enums.each do |name, mapping|
|
699
|
+
enums[name] = mapping
|
700
|
+
end
|
701
|
+
end
|
702
|
+
|
703
|
+
# 2) Infer from mapping constants like ATTACHMENT_TYPES = { 'video'=>0, 'image'=>1 }
|
704
|
+
begin
|
705
|
+
column_names = model.column_names
|
706
|
+
rescue StandardError
|
707
|
+
column_names = []
|
708
|
+
end
|
709
|
+
|
710
|
+
model.constants(false).each do |const_name|
|
711
|
+
value = model.const_get(const_name)
|
712
|
+
next unless value.is_a?(Hash)
|
713
|
+
# Ensure values are integers (or coercible) and keys are strings/symbols
|
714
|
+
next unless value.keys.all? { |k| k.is_a?(String) || k.is_a?(Symbol) }
|
715
|
+
next unless value.values.all? { |v| v.is_a?(Integer) || (v.is_a?(String) && v.match?(/^\d+$/)) }
|
716
|
+
|
717
|
+
const_str = const_name.to_s
|
718
|
+
# Heuristic: map *_TYPES, *_STATUS, *_STATUSES, *_KINDS to *_type/_status columns
|
719
|
+
base = const_str.downcase
|
720
|
+
base = base.sub(/_types\z/, '').sub(/_type\z/, '').sub(/_statuses\z/, '').sub(/_status\z/, '').sub(/_kinds\z/, '').sub(/_kind\z/, '')
|
721
|
+
|
722
|
+
candidate_columns = []
|
723
|
+
candidate_columns << (base.end_with?('_') ? "#{base}type" : "#{base}_type")
|
724
|
+
candidate_columns << (base.end_with?('_') ? "#{base}status" : "#{base}_status")
|
725
|
+
|
726
|
+
matched_column = candidate_columns.find { |c| column_names.include?(c) }
|
727
|
+
next unless matched_column
|
728
|
+
|
729
|
+
mapping = {}
|
730
|
+
value.each do |k, v|
|
731
|
+
label = k.to_s
|
732
|
+
mapping[label] = Integer(v)
|
733
|
+
end
|
734
|
+
|
735
|
+
# Merge/augment if Rails enum already present for the same column
|
736
|
+
enums[matched_column] = (enums[matched_column] || {}).merge(mapping)
|
737
|
+
rescue StandardError
|
738
|
+
next
|
739
|
+
end
|
740
|
+
|
741
|
+
# 3) Fallback via generated enum helper methods (e.g., attachment_types)
|
742
|
+
begin
|
743
|
+
column_names.each do |col|
|
744
|
+
plural_method = if ''.respond_to?(:pluralize)
|
745
|
+
col.to_s.pluralize
|
746
|
+
else
|
747
|
+
"#{col}s"
|
748
|
+
end
|
749
|
+
next unless model.respond_to?(plural_method)
|
750
|
+
|
751
|
+
mapping = model.public_send(plural_method)
|
752
|
+
next unless mapping.is_a?(Hash) && mapping.keys.all? { |k| k.is_a?(String) || k.is_a?(Symbol) }
|
753
|
+
next unless mapping.values.all? { |v| v.is_a?(Integer) || (v.is_a?(String) && v.match?(/^\d+$/)) }
|
754
|
+
|
755
|
+
normalized = {}
|
756
|
+
mapping.each { |k, v| normalized[k.to_s] = Integer(v) }
|
757
|
+
enums[col.to_s] = (enums[col.to_s] || {}).merge(normalized)
|
758
|
+
end
|
759
|
+
rescue StandardError
|
760
|
+
# ignore
|
761
|
+
end
|
762
|
+
rescue StandardError => e
|
763
|
+
@config.logger.warn("[code_to_query] Failed to extract enums for #{model.name}: #{e.message}")
|
764
|
+
end
|
765
|
+
enums
|
766
|
+
end
|
767
|
+
|
768
|
+
def extract_model_scopes(model)
|
769
|
+
scopes = {}
|
770
|
+
|
771
|
+
# Prefer explicit registries if available
|
772
|
+
registry_names = []
|
773
|
+
if model.respond_to?(:scope_registry) && model.scope_registry.respond_to?(:each)
|
774
|
+
model.scope_registry.each do |name, body|
|
775
|
+
registry_names << name.to_s
|
776
|
+
scopes[name.to_s] = { type: 'scope', arity: (body.respond_to?(:arity) ? body.arity : 0) }
|
777
|
+
end
|
778
|
+
elsif model.respond_to?(:scopes)
|
779
|
+
model.scopes.each do |name, scope_proc|
|
780
|
+
registry_names << name.to_s
|
781
|
+
scopes[name.to_s] = { type: 'scope', arity: scope_proc&.arity || 0 }
|
782
|
+
end
|
783
|
+
elsif model.respond_to?(:defined_scopes)
|
784
|
+
model.defined_scopes.each_key do |name|
|
785
|
+
registry_names << name.to_s
|
786
|
+
scopes[name.to_s] = { type: 'scope', arity: 0 }
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
# Fallback: probe singleton methods that look like scopes and return a Relation
|
791
|
+
begin
|
792
|
+
candidate_methods = model.singleton_methods(false).select { |m| m.to_s.match?(/\A[a-z_][a-zA-Z0-9_]*\z/) }
|
793
|
+
disallow = %w[new create update delete destroy find where order limit select joins includes preload eager_load pluck first last all none not or count sum average minimum maximum]
|
794
|
+
candidate_methods.each do |m|
|
795
|
+
next if disallow.include?(m.to_s)
|
796
|
+
next if registry_names.include?(m.to_s)
|
797
|
+
|
798
|
+
meth = model.method(m)
|
799
|
+
ar = meth.arity
|
800
|
+
# Only try zero-arg or optional-arg methods
|
801
|
+
next unless ar.zero? || ar.negative?
|
802
|
+
|
803
|
+
rel = nil
|
804
|
+
begin
|
805
|
+
rel = meth.call
|
806
|
+
rescue ArgumentError
|
807
|
+
next
|
808
|
+
rescue StandardError
|
809
|
+
next
|
810
|
+
end
|
811
|
+
if defined?(ActiveRecord::Relation) && rel.is_a?(ActiveRecord::Relation)
|
812
|
+
scopes[m.to_s] ||= { type: 'scope', arity: ar }
|
813
|
+
end
|
814
|
+
end
|
815
|
+
rescue StandardError
|
816
|
+
# ignore
|
817
|
+
end
|
818
|
+
|
819
|
+
# Enhance scope entries with sample SQL and a compact where summary
|
820
|
+
scopes.each_key do |name|
|
821
|
+
rel = model.public_send(name)
|
822
|
+
if defined?(ActiveRecord::Relation) && rel.is_a?(ActiveRecord::Relation)
|
823
|
+
sql = rel.limit(1).to_sql
|
824
|
+
scopes[name][:sample_sql] = truncate(sql, 500)
|
825
|
+
where = extract_where_clause(sql)
|
826
|
+
scopes[name][:where] = truncate(where, 200) if where
|
827
|
+
end
|
828
|
+
rescue StandardError
|
829
|
+
# skip scopes that error out
|
830
|
+
end
|
831
|
+
|
832
|
+
scopes
|
833
|
+
rescue StandardError => e
|
834
|
+
@config.logger.warn("[code_to_query] Failed to extract scopes for #{model.name}: #{e.message}")
|
835
|
+
{}
|
836
|
+
end
|
837
|
+
|
838
|
+
def extract_where_clause(sql)
|
839
|
+
return nil unless sql.is_a?(String)
|
840
|
+
|
841
|
+
if (m = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER\s+BY|\s+LIMIT|\s+GROUP\s+BY|\z)/i))
|
842
|
+
# Normalize quotes and whitespace
|
843
|
+
m[1].gsub('"', '"').gsub(/\s+/, ' ').strip
|
844
|
+
|
845
|
+
end
|
846
|
+
end
|
847
|
+
|
848
|
+
def truncate(str, max)
|
849
|
+
return str unless str.is_a?(String)
|
850
|
+
return str if str.length <= max
|
851
|
+
|
852
|
+
"#{str[0, max - 3]}..."
|
853
|
+
end
|
854
|
+
|
855
|
+
def generate_table_synonyms(table_name)
|
856
|
+
synonyms = []
|
857
|
+
|
858
|
+
# Singular/plural variations
|
859
|
+
synonyms << if table_name.end_with?('s')
|
860
|
+
table_name.chomp('s')
|
861
|
+
else
|
862
|
+
"#{table_name}s"
|
863
|
+
end
|
864
|
+
|
865
|
+
# Basic common business synonyms - LLM will extend these based on actual schema
|
866
|
+
business_synonyms = {}
|
867
|
+
|
868
|
+
synonyms.concat(business_synonyms[table_name] || [])
|
869
|
+
synonyms.uniq
|
870
|
+
end
|
871
|
+
|
872
|
+
def generate_column_synonyms(column_name, sql_type)
|
873
|
+
synonyms = []
|
874
|
+
|
875
|
+
# Basic common column synonyms - LLM will extend these based on actual schema
|
876
|
+
column_synonyms = {
|
877
|
+
'created_at' => %w[created_on date_created creation_date],
|
878
|
+
'updated_at' => %w[updated_on date_updated modification_date],
|
879
|
+
'email' => %w[email_address e_mail],
|
880
|
+
'phone' => %w[phone_number telephone],
|
881
|
+
'amount' => %w[total price cost value],
|
882
|
+
'quantity' => %w[qty amount count],
|
883
|
+
'description' => %w[desc summary details]
|
884
|
+
}
|
885
|
+
|
886
|
+
synonyms.concat(column_synonyms[column_name] || [])
|
887
|
+
|
888
|
+
# Type-based synonyms
|
889
|
+
if sql_type.to_s.match?(/money|decimal.*2|numeric.*2/) && !column_name.match?(/amount|price|cost/)
|
890
|
+
synonyms.push(%w[amount price cost])
|
891
|
+
end
|
892
|
+
|
893
|
+
synonyms.uniq
|
894
|
+
end
|
895
|
+
|
896
|
+
def load_business_glossary
|
897
|
+
glossary_path = File.join(File.dirname(@config.context_pack_path), 'business_glossary.json')
|
898
|
+
|
899
|
+
if File.exist?(glossary_path)
|
900
|
+
JSON.parse(File.read(glossary_path))
|
901
|
+
else
|
902
|
+
{}
|
903
|
+
end
|
904
|
+
rescue StandardError
|
905
|
+
{}
|
906
|
+
end
|
907
|
+
|
908
|
+
def enrich_glossary_with_llm(base_glossary, schema_data, models_data)
|
909
|
+
return base_glossary unless @config.auto_glossary_with_llm
|
910
|
+
return base_glossary unless @config.openai_api_key
|
911
|
+
|
912
|
+
# Build a compact prompt for the LLM to suggest synonyms and intent hints
|
913
|
+
begin
|
914
|
+
require 'net/http'
|
915
|
+
require 'uri'
|
916
|
+
require 'json'
|
917
|
+
|
918
|
+
tables = Array(schema_data[:tables] || schema_data['tables'])
|
919
|
+
table_summaries = tables.take(20).map do |t|
|
920
|
+
name = t[:name] || t['name']
|
921
|
+
cols = Array(t[:columns] || t['columns']).map { |c| c[:name] || c['name'] }
|
922
|
+
fks = cols.select { |c| c.to_s.end_with?('_id') }
|
923
|
+
"#{name}: cols(#{cols.take(12).join(', ')}), fks(#{fks.join(', ')})"
|
924
|
+
end
|
925
|
+
|
926
|
+
scopes = models_data[:scopes] || models_data['scopes'] || {}
|
927
|
+
scope_lines = scopes.flat_map do |model_name, scope_hash|
|
928
|
+
next [] unless scope_hash.is_a?(Hash)
|
929
|
+
|
930
|
+
scope_hash.map do |scope_name, meta|
|
931
|
+
where = meta[:where] || meta['where']
|
932
|
+
"#{model_name}.#{scope_name}: #{where}" if where
|
933
|
+
end.compact
|
934
|
+
end.take(40)
|
935
|
+
|
936
|
+
system_prompt = <<~P
|
937
|
+
You are analyzing a database schema to understand the business domain and create a comprehensive glossary.
|
938
|
+
|
939
|
+
Your task: Infer the business domain from table names, column names, and relationships, then create mappings that help users query with natural language.
|
940
|
+
|
941
|
+
Analyze the schema to understand:
|
942
|
+
1. What business domain this represents (e.g. e-commerce, education, CRM, content management, etc.)
|
943
|
+
2. What real-world entities and relationships exist
|
944
|
+
3. How a user would naturally refer to these entities and relationships
|
945
|
+
|
946
|
+
Create mappings for:
|
947
|
+
- Business entities: How users refer to main concepts
|
948
|
+
- Relationships: How users describe connections between entities (e.g., "answered by", "created by", "belongs to")
|
949
|
+
- Actions/States: How users describe actions or states (e.g., "completed", "failed", "pending")
|
950
|
+
- IDs and Foreign Keys: How users refer to specific entities
|
951
|
+
- Domain-specific terms: Technical terms users might use differently than column names
|
952
|
+
|
953
|
+
For relationship queries, create special relationship mappings:
|
954
|
+
- Key format: "relationship_[action]_[entity]"#{' '}
|
955
|
+
- Value: Array describing the EXISTS pattern needed
|
956
|
+
|
957
|
+
Rules:
|
958
|
+
- Only use tables and columns that exist in the schema
|
959
|
+
- Infer domain from naming patterns, foreign keys, and table relationships
|
960
|
+
- Don't assume any specific business domain
|
961
|
+
- Create practical mappings a real user would need
|
962
|
+
- Output JSON: {"term": ["synonym1", "synonym2"], "table.column": ["user_term1"], "relationship_action_entity": ["EXISTS pattern hint"]}
|
963
|
+
|
964
|
+
Keep under #{@config.max_glossary_suggestions} entries total.
|
965
|
+
P
|
966
|
+
|
967
|
+
# Analyze foreign key relationships to understand business logic
|
968
|
+
fk_relationships = tables.flat_map do |t|
|
969
|
+
table_name = t[:name] || t['name']
|
970
|
+
fks = Array(t[:foreign_keys] || t['foreign_keys'])
|
971
|
+
fks.map do |fk|
|
972
|
+
"#{table_name}.#{fk[:column] || fk['column']} -> #{fk[:to_table] || fk['to_table']}"
|
973
|
+
end
|
974
|
+
end
|
975
|
+
|
976
|
+
user_prompt = <<~U
|
977
|
+
DATABASE SCHEMA ANALYSIS:
|
978
|
+
|
979
|
+
Tables and Columns:
|
980
|
+
#{table_summaries.join("\n")}
|
981
|
+
|
982
|
+
Foreign Key Relationships (showing business connections):
|
983
|
+
#{fk_relationships.join("\n")}
|
984
|
+
|
985
|
+
Model Scopes (showing common business queries):
|
986
|
+
#{scope_lines.join("\n")}
|
987
|
+
|
988
|
+
Current glossary has: #{base_glossary.keys.take(20).join(', ')}
|
989
|
+
|
990
|
+
TASK: Analyze this schema and infer the business domain. Create a glossary that maps how real users would naturally refer to these entities and relationships. Focus especially on understanding what the foreign key relationships tell us about the business logic.
|
991
|
+
U
|
992
|
+
|
993
|
+
messages = [
|
994
|
+
{ role: 'system', content: system_prompt },
|
995
|
+
{ role: 'user', content: user_prompt }
|
996
|
+
]
|
997
|
+
|
998
|
+
client = @config.llm_client || CodeToQuery::LLMClient.new(@config)
|
999
|
+
text = client.chat(messages: messages)
|
1000
|
+
suggestions = begin
|
1001
|
+
JSON.parse(text)
|
1002
|
+
rescue StandardError
|
1003
|
+
{}
|
1004
|
+
end
|
1005
|
+
if suggestions.is_a?(Hash)
|
1006
|
+
return base_glossary.merge(suggestions)
|
1007
|
+
end
|
1008
|
+
rescue StandardError => e
|
1009
|
+
@config.logger.warn("[code_to_query] LLM glossary enrichment failed: #{e.message}")
|
1010
|
+
end
|
1011
|
+
|
1012
|
+
base_glossary
|
1013
|
+
end
|
1014
|
+
|
1015
|
+
def extract_authorization_policies
|
1016
|
+
policies = {}
|
1017
|
+
|
1018
|
+
# Check for Pundit policies
|
1019
|
+
policies[:pundit] = extract_pundit_policies if defined?(Pundit)
|
1020
|
+
|
1021
|
+
# Check for CanCanCan abilities
|
1022
|
+
policies[:cancan] = extract_cancan_policies if defined?(CanCan)
|
1023
|
+
|
1024
|
+
policies
|
1025
|
+
rescue StandardError => e
|
1026
|
+
warn "[code_to_query] Failed to extract authorization policies: #{e.message}"
|
1027
|
+
{}
|
1028
|
+
end
|
1029
|
+
|
1030
|
+
def extract_pundit_policies
|
1031
|
+
# This would extract Pundit policy information
|
1032
|
+
# Implementation depends on your specific setup
|
1033
|
+
{}
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
def extract_cancan_policies
|
1037
|
+
# This would extract CanCanCan ability information
|
1038
|
+
# Implementation depends on your specific setup
|
1039
|
+
{}
|
1040
|
+
end
|
1041
|
+
|
1042
|
+
def write_pack(pack)
|
1043
|
+
path = @config.context_pack_path
|
1044
|
+
dir = File.dirname(path)
|
1045
|
+
FileUtils.mkdir_p(dir)
|
1046
|
+
File.write(path, pack.to_json)
|
1047
|
+
|
1048
|
+
# Also write a human-readable version
|
1049
|
+
readme_path = File.join(dir, 'README.md')
|
1050
|
+
File.write(readme_path, generate_context_readme(pack))
|
1051
|
+
end
|
1052
|
+
|
1053
|
+
def generate_context_readme(pack)
|
1054
|
+
<<~README
|
1055
|
+
# CodeToQuery Context Pack
|
1056
|
+
|
1057
|
+
Generated on: #{Time.now}
|
1058
|
+
Database Adapter: #{pack.schema[:adapter]}
|
1059
|
+
Schema Version: #{pack.schema[:version]}
|
1060
|
+
|
1061
|
+
## Tables (#{pack.schema[:tables].length})
|
1062
|
+
|
1063
|
+
#{pack.schema[:tables].map do |table|
|
1064
|
+
"- **#{table[:name]}** (#{table[:columns].length} columns)"
|
1065
|
+
end.join("\n")}
|
1066
|
+
|
1067
|
+
## Models (#{pack.models[:models].length})
|
1068
|
+
|
1069
|
+
#{pack.models[:models].map do |name, info|
|
1070
|
+
"- **#{name}** → `#{info[:table_name]}`"
|
1071
|
+
end.join("\n")}
|
1072
|
+
|
1073
|
+
## Glossary Terms (#{pack.glossary.length})
|
1074
|
+
|
1075
|
+
#{pack.glossary.map do |term, synonyms|
|
1076
|
+
"- **#{term}**: #{Array(synonyms).join(', ')}"
|
1077
|
+
end.join("\n")}
|
1078
|
+
|
1079
|
+
This context pack is used by CodeToQuery to understand your database schema,
|
1080
|
+
model relationships, and business terminology for accurate natural language
|
1081
|
+
query translation.
|
1082
|
+
README
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
# rubocop:enable Metrics/ClassLength
|
1086
|
+
end
|
1087
|
+
end
|