code_to_query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1087 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Context::Builder inspects the DB and app to produce a context pack file.
4
+
5
+ require 'fileutils'
6
+
7
+ begin
8
+ require 'active_record'
9
+ rescue LoadError
10
+ end
11
+
12
+ module CodeToQuery
13
+ module Context
14
+ # rubocop:disable Metrics/ClassLength
15
+ class Builder
16
+ def self.bootstrap!
17
+ new.bootstrap!
18
+ end
19
+
20
+ def initialize(config = CodeToQuery.config)
21
+ @config = config
22
+ end
23
+
24
+ # Build a full pack and write it to disk
25
+ def bootstrap!
26
+ # First attempt to extract schema
27
+ schema_data = extract_schema
28
+ initial_count = schema_data[:tables]&.length || schema_data['tables']&.length || 0
29
+ @config.logger.info("[code_to_query] Schema data structure: #{schema_data.keys} with #{initial_count} tables")
30
+
31
+ # If schema looks empty, try scanning the app to force-load models/connection, then retry
32
+ models_data = scan_app
33
+ # Optionally enrich with static scan
34
+ if @config.prefer_static_scan
35
+ static_data = static_scan_app
36
+ models_data = deep_merge_models(models_data, static_data)
37
+ end
38
+ if initial_count.to_i.zero?
39
+ schema_data = extract_schema
40
+ retry_count = schema_data[:tables]&.length || schema_data['tables']&.length || 0
41
+ @config.logger.info("[code_to_query] Retried schema extraction after app scan: #{retry_count} tables")
42
+ end
43
+
44
+ pack = Pack.new(
45
+ schema: schema_data,
46
+ models: models_data,
47
+ glossary: enrich_glossary_with_llm(generate_glossary(schema_data), schema_data, models_data),
48
+ policies: collect_policies,
49
+ hints: { performance: [], joins: extract_join_hints(schema_data) }
50
+ )
51
+ write_pack(pack)
52
+ pack
53
+ end
54
+
55
+ # --- Components (stubs that won't crash) ---
56
+
57
+ def extract_schema
58
+ unless defined?(ActiveRecord::Base)
59
+ return { tables: [], version: 'unknown', adapter: 'none' }
60
+ end
61
+
62
+ # Try multiple approaches to establish connection
63
+ connection = nil
64
+ connection_attempts = 0
65
+ max_attempts = 3
66
+
67
+ while connection.nil? && connection_attempts < max_attempts
68
+ connection_attempts += 1
69
+ begin
70
+ # Force-establish a connection (Rails defers until first use)
71
+ ActiveRecord::Base.connection
72
+
73
+ # Verify connection is actually working
74
+ if ActiveRecord::Base.connected?
75
+ connection = ActiveRecord::Base.connection
76
+ @config.logger.info("[code_to_query] Connected to database with adapter: #{connection.adapter_name} (attempt #{connection_attempts})")
77
+ break
78
+ else
79
+ @config.logger.info("[code_to_query] Database not connected on attempt #{connection_attempts}")
80
+ sleep(0.1) if connection_attempts < max_attempts
81
+ end
82
+ rescue StandardError => e
83
+ @config.logger.warn("[code_to_query] Connection attempt #{connection_attempts} failed: #{e.message}")
84
+ sleep(0.1) if connection_attempts < max_attempts
85
+ end
86
+ end
87
+
88
+ unless connection
89
+ error_msg = "Failed to establish database connection after #{max_attempts} attempts"
90
+ @config.logger.warn("[code_to_query] #{error_msg}")
91
+ return { tables: [], version: 'unknown', adapter: @config.adapter.to_s, error: error_msg }
92
+ end
93
+
94
+ tables = list_tables(connection)
95
+ @config.logger.info("[code_to_query] Found #{tables.length} tables: #{tables.join(', ')}") if tables.any?
96
+ @config.logger.info('[code_to_query] No tables found') if tables.empty?
97
+
98
+ result = {
99
+ tables: tables.map do |table_name|
100
+ {
101
+ name: table_name,
102
+ columns: extract_table_columns(connection, table_name),
103
+ indexes: extract_table_indexes(connection, table_name),
104
+ foreign_keys: extract_foreign_keys(connection, table_name),
105
+ constraints: extract_table_constraints(connection, table_name)
106
+ }
107
+ end,
108
+ version: extract_schema_version(connection),
109
+ adapter: connection.adapter_name.downcase
110
+ }
111
+
112
+ @config.logger.info("[code_to_query] Schema extraction completed with #{result[:tables].length} tables")
113
+ result
114
+ rescue StandardError => e
115
+ @config.logger.warn("[code_to_query] Schema extraction failed: #{e.message}")
116
+ @config.logger.warn("[code_to_query] Backtrace: #{e.backtrace.first(5).join("\n")}")
117
+ { tables: [], version: 'unknown', adapter: @config.adapter.to_s, error: e.message }
118
+ end
119
+
120
+ def scan_app
121
+ models = {}
122
+ associations = {}
123
+ validations = {}
124
+ scopes = {}
125
+
126
+ if defined?(ActiveRecord::Base)
127
+ # Ensure models are loaded so descendants is populated
128
+ if defined?(Rails) && Rails.respond_to?(:application)
129
+ begin
130
+ Rails.application.eager_load!
131
+ rescue StandardError => e
132
+ @config.logger.warn("[code_to_query] Eager load failed: #{e.message}")
133
+ end
134
+ end
135
+ ActiveRecord::Base.descendants.each do |model|
136
+ next unless model.table_exists?
137
+
138
+ model_name = model.name
139
+ table_name = model.table_name
140
+
141
+ models[model_name] = {
142
+ table_name: table_name,
143
+ primary_key: model.primary_key,
144
+ inheritance_column: model.inheritance_column,
145
+ timestamps: has_timestamps?(model),
146
+ soft_delete: has_soft_delete?(model),
147
+ enums: extract_model_enums(model)
148
+ }
149
+
150
+ associations[model_name] = extract_model_associations(model)
151
+ validations[model_name] = extract_model_validations(model)
152
+ scopes[model_name] = extract_model_scopes(model)
153
+ end
154
+ end
155
+
156
+ {
157
+ models: models,
158
+ associations: associations,
159
+ validations: validations,
160
+ scopes: scopes
161
+ }
162
+ rescue StandardError => e
163
+ @config.logger.warn("[code_to_query] App scanning failed: #{e.message}")
164
+ { models: {}, associations: {}, validations: {}, scopes: {}, error: e.message }
165
+ end
166
+
167
+ def generate_glossary(existing_schema = nil)
168
+ glossary = {}
169
+
170
+ # Auto-generate from schema (prefer the already extracted schema)
171
+ schema = existing_schema || extract_schema
172
+ tables = schema[:tables] || schema['tables'] || []
173
+ tables.each do |table|
174
+ table_name = table[:name] || table['name']
175
+
176
+ # Generate table synonyms
177
+ synonyms = generate_table_synonyms(table_name)
178
+ glossary[table_name] = synonyms if synonyms.any?
179
+
180
+ # Generate column synonyms
181
+ Array(table[:columns] || table['columns']).each do |column|
182
+ column_name = column[:name] || column['name']
183
+ sql_type = column[:sql_type] || column['sql_type']
184
+ column_synonyms = generate_column_synonyms(column_name, sql_type)
185
+ if column_synonyms.any?
186
+ key = "#{table_name}.#{column_name}"
187
+ glossary[key] = column_synonyms
188
+ end
189
+ end
190
+ end
191
+
192
+ # Add business-specific glossary
193
+ glossary.merge!(load_business_glossary)
194
+
195
+ glossary
196
+ rescue StandardError => e
197
+ @config.logger.warn("[code_to_query] Glossary generation failed: #{e.message}")
198
+ { error: e.message }
199
+ end
200
+
201
+ def collect_policies
202
+ policies = {
203
+ enforced_predicates: {},
204
+ column_access: {},
205
+ row_level_security: {},
206
+ audit_requirements: {}
207
+ }
208
+
209
+ # Get policies from configuration
210
+ if @config.policy_adapter.respond_to?(:call)
211
+ begin
212
+ # In a real implementation, you'd pass the actual user context
213
+ user_policies = @config.policy_adapter.call(nil)
214
+ policies[:enforced_predicates] = user_policies if user_policies.is_a?(Hash)
215
+ rescue StandardError => e
216
+ @config.logger.warn("[code_to_query] Policy collection failed: #{e.message}")
217
+ end
218
+ end
219
+
220
+ # Extract policies from models (if using Pundit, CanCan, etc.)
221
+ policies.merge!(extract_authorization_policies)
222
+
223
+ policies
224
+ rescue StandardError => e
225
+ @config.logger.warn("[code_to_query] Policy collection failed: #{e.message}")
226
+ { enforced_predicates: {}, error: e.message }
227
+ end
228
+
229
+ def verify!
230
+ path = @config.context_pack_path.to_s
231
+ raise "Context pack not found at #{path}" unless File.exist?(path)
232
+
233
+ json = JSON.parse(File.read(path))
234
+ raise 'Context pack missing schema.tables' unless json.dig('schema', 'tables').is_a?(Array)
235
+
236
+ true
237
+ end
238
+
239
+ private
240
+
241
+ # Lightweight static scan using regex heuristics to avoid runtime execution.
242
+ # If parser/rubocop-ast is available, we could replace these regexes with AST parsing.
243
+ def static_scan_app
244
+ result = { models: {}, associations: {}, validations: {}, scopes: {} }
245
+ dirs = Array(@config.static_scan_dirs).compact
246
+ dirs.each do |dir|
247
+ next unless Dir.exist?(dir)
248
+
249
+ Dir.glob(File.join(dir, '**/*.rb')).each do |file|
250
+ begin
251
+ content = File.read(file)
252
+ rescue StandardError
253
+ next
254
+ end
255
+ model_name = infer_model_name_from_path(file)
256
+ next unless model_name
257
+
258
+ result[:models][model_name] ||= { table_name: nil, primary_key: 'id', inheritance_column: 'type', timestamps: true, soft_delete: false, enums: {} }
259
+ result[:scopes][model_name] ||= {}
260
+
261
+ # enum lines: enum attachment_type: { video: 0, image: 1 }
262
+ content.scan(/\benum\s+([a-zA-Z0-9_]+):\s*\{([^}]+)\}/).each do |(col, body)|
263
+ mapping = {}
264
+ body.split(',').each do |pair|
265
+ if (m = pair.strip.match(/([a-zA-Z0-9_]+):\s*(\d+)/))
266
+ mapping[m[1]] = m[2].to_i
267
+ end
268
+ end
269
+ next if mapping.empty?
270
+
271
+ result[:models][model_name][:enums][col] ||= {}
272
+ result[:models][model_name][:enums][col].merge!(mapping)
273
+ end
274
+
275
+ # constant maps: ATTACHMENT_TYPES = { video: 0, image: 1 }
276
+ content.scan(/([A-Z][A-Z0-9_]+)\s*=\s*\{([^}]+)\}/).each do |(const_name, body)|
277
+ mapping = {}
278
+ body.split(',').each do |pair|
279
+ if (m = pair.strip.match(/([a-zA-Z0-9_]+):\s*(\d+)/))
280
+ mapping[m[1]] = m[2].to_i
281
+ end
282
+ end
283
+ next if mapping.empty?
284
+
285
+ base = const_name.downcase.sub(/_types\z/, '').sub(/_type\z/, '').sub(/_statuses\z/, '').sub(/_status\z/, '').sub(/_kinds\z/, '').sub(/_kind\z/, '')
286
+ candidate_cols = ["#{base}_type", "#{base}_status"]
287
+ col = candidate_cols.find { |c| content.include?(c) }
288
+ next unless col
289
+
290
+ result[:models][model_name][:enums][col] ||= {}
291
+ result[:models][model_name][:enums][col].merge!(mapping)
292
+ end
293
+
294
+ # scopes: scope :with_videos, -> { where(attachment_type: 0) }
295
+ content.scan(/scope\s+:([a-zA-Z0-9_]+),\s*->\s*\{([^}]+)\}/m).each do |(name, body)|
296
+ where = body.strip.gsub(/\s+/, ' ')
297
+ result[:scopes][model_name][name] = { type: 'scope', arity: -1, where: where[0..200] }
298
+ end
299
+ end
300
+ end
301
+ result
302
+ rescue StandardError => e
303
+ @config.logger.warn("[code_to_query] Static scan failed: #{e.message}")
304
+ { models: {}, associations: {}, validations: {}, scopes: {} }
305
+ end
306
+
307
+ def deep_merge_models(primary, extra)
308
+ merged = Marshal.load(Marshal.dump(primary))
309
+ %i[models associations validations scopes].each do |key|
310
+ merged[key] ||= {}
311
+ (extra[key] || {}).each do |k, v|
312
+ merged[key][k] = if v.is_a?(Hash) && merged[key][k].is_a?(Hash)
313
+ merged[key][k].merge(v) { |_kk, a, b| a.is_a?(Hash) && b.is_a?(Hash) ? a.merge(b) : b }
314
+ else
315
+ v
316
+ end
317
+ end
318
+ end
319
+ merged
320
+ rescue StandardError
321
+ primary
322
+ end
323
+
324
+ def infer_model_name_from_path(path)
325
+ # app/models/question.rb -> Question
326
+ base = File.basename(path, '.rb')
327
+ return nil if base.empty?
328
+
329
+ base.split('/').last.split('_').map(&:capitalize).join
330
+ end
331
+
332
+ def extract_join_hints(schema)
333
+ tables = Array(schema[:tables] || schema['tables'] || [])
334
+ fks = []
335
+ tables.each do |t|
336
+ tname = t[:name] || t['name']
337
+ cols = Array(t[:columns] || t['columns'])
338
+ cols.each do |c|
339
+ cname = c[:name] || c['name']
340
+ if cname.end_with?('_id')
341
+ ref = cname.sub(/_id\z/, 's')
342
+ fks << { from: tname, column: cname, to: ref }
343
+ end
344
+ end
345
+ end
346
+ fks
347
+ rescue StandardError
348
+ []
349
+ end
350
+
351
+ def list_tables(connection)
352
+ adapter_name = connection.adapter_name.downcase
353
+ @config.logger.info("[code_to_query] Detecting tables for adapter: #{adapter_name}")
354
+
355
+ # Try Rails helpers first
356
+ names = []
357
+ begin
358
+ if connection.respond_to?(:data_sources)
359
+ names = Array(connection.data_sources)
360
+ @config.logger.info("[code_to_query] Using data_sources method, found #{names.length} tables")
361
+ else
362
+ names = Array(connection.tables)
363
+ @config.logger.info("[code_to_query] Using tables method, found #{names.length} tables")
364
+ end
365
+ rescue StandardError => e
366
+ @config.logger.warn("[code_to_query] Rails helpers failed: #{e.message}")
367
+ names = []
368
+ end
369
+ return names.uniq if names.any?
370
+
371
+ # Fallback by adapter - use actual adapter name from connection
372
+ case adapter_name
373
+ when 'postgresql'
374
+ @config.logger.info('[code_to_query] Trying PostgreSQL specific queries')
375
+ # First try with search path
376
+ begin
377
+ search_path = connection.respond_to?(:schema_search_path) ? connection.schema_search_path.to_s : 'public'
378
+ schemas = search_path.split(',').map { |s| s.strip.gsub('"', '') }
379
+ @config.logger.info("[code_to_query] Using schemas: #{schemas.join(', ')}")
380
+ sql = <<~SQL
381
+ SELECT schemaname, tablename
382
+ FROM pg_tables
383
+ WHERE schemaname = ANY (ARRAY[#{schemas.map { |s| connection.quote(s) }.join(', ')}])
384
+ SQL
385
+ result = connection.execute(sql)
386
+ pg_names = result.map { |r| r['tablename'] || r[:tablename] }.compact.uniq
387
+ @config.logger.info("[code_to_query] Found #{pg_names.length} tables via pg_tables: #{pg_names.join(', ')}")
388
+ return pg_names if pg_names.any?
389
+ rescue StandardError => e
390
+ @config.logger.warn("[code_to_query] pg_tables query failed: #{e.message}")
391
+ end
392
+
393
+ # Fallback to information_schema
394
+ begin
395
+ info = connection.execute(<<~SQL)
396
+ SELECT table_schema, table_name
397
+ FROM information_schema.tables
398
+ WHERE table_type = 'BASE TABLE'
399
+ AND table_schema NOT IN ('pg_catalog','information_schema')
400
+ SQL
401
+ info_names = info.map { |r| r['table_name'] || r[:table_name] }.compact.uniq
402
+ @config.logger.info("[code_to_query] Found #{info_names.length} tables via information_schema: #{info_names.join(', ')}")
403
+ return info_names if info_names.any?
404
+ rescue StandardError => e
405
+ @config.logger.warn("[code_to_query] information_schema query failed: #{e.message}")
406
+ end
407
+ when 'mysql2', 'mysql'
408
+ @config.logger.info('[code_to_query] Trying MySQL specific queries')
409
+ begin
410
+ result = connection.execute("SHOW FULL TABLES WHERE Table_type = 'BASE TABLE'")
411
+ mysql_names = result.map { |r| r.values.first }.compact.uniq
412
+ @config.logger.info("[code_to_query] Found #{mysql_names.length} tables via SHOW TABLES: #{mysql_names.join(', ')}")
413
+ return mysql_names if mysql_names.any?
414
+ rescue StandardError => e
415
+ @config.logger.warn("[code_to_query] SHOW TABLES query failed: #{e.message}")
416
+ end
417
+ when 'sqlite3', 'sqlite'
418
+ @config.logger.info('[code_to_query] Trying SQLite specific queries')
419
+ begin
420
+ result = connection.execute("SELECT name FROM sqlite_master WHERE type='table'")
421
+ sqlite_names = result.map { |r| r['name'] || r[:name] }.compact.uniq
422
+ @config.logger.info("[code_to_query] Found #{sqlite_names.length} tables via sqlite_master: #{sqlite_names.join(', ')}")
423
+ return sqlite_names if sqlite_names.any?
424
+ rescue StandardError => e
425
+ @config.logger.warn("[code_to_query] sqlite_master query failed: #{e.message}")
426
+ end
427
+ else
428
+ @config.logger.info("[code_to_query] Unknown adapter '#{adapter_name}', trying generic methods")
429
+ end
430
+
431
+ # Last resort: parse db/schema.rb if present
432
+ @config.logger.info('[code_to_query] Trying to parse db/schema.rb as last resort')
433
+ parsed = parse_schema_rb
434
+ if parsed.any?
435
+ @config.logger.info("[code_to_query] Found #{parsed.length} tables in schema.rb: #{parsed.join(', ')}")
436
+ return parsed
437
+ end
438
+
439
+ @config.logger.info('[code_to_query] No tables found through any method')
440
+ []
441
+ end
442
+
443
+ def parse_schema_rb
444
+ return [] unless defined?(Rails)
445
+
446
+ schema_path = Rails.root.join('db', 'schema.rb')
447
+ unless File.exist?(schema_path)
448
+ @config.logger.info("[code_to_query] schema.rb not found at #{schema_path}")
449
+ return []
450
+ end
451
+
452
+ begin
453
+ content = File.read(schema_path)
454
+ # Match lines like: create_table "table_name", force: :cascade do |t|
455
+ tables = content.scan(/create_table\s+"([^"]+)"/).flatten.uniq
456
+ @config.logger.info("[code_to_query] Parsed #{tables.length} table names from schema.rb: #{tables.join(', ')}")
457
+ tables
458
+ rescue StandardError => e
459
+ @config.logger.warn("[code_to_query] Failed to parse schema.rb: #{e.message}")
460
+ []
461
+ end
462
+ end
463
+
464
+ def extract_table_columns(connection, table_name)
465
+ primary_key_name = connection.primary_key(table_name)
466
+
467
+ connection.columns(table_name).map do |col|
468
+ is_primary = col.name == primary_key_name
469
+
470
+ {
471
+ name: col.name,
472
+ sql_type: col.sql_type,
473
+ type: col.type,
474
+ null: col.null,
475
+ default: col.default,
476
+ primary: is_primary,
477
+ auto_increment: determine_auto_increment(col, connection: connection, is_primary: is_primary),
478
+ comment: extract_column_comment(connection, table_name, col.name)
479
+ }
480
+ end
481
+ rescue StandardError => e
482
+ @config.logger.warn("[code_to_query] Failed to extract columns for #{table_name}: #{e.message}")
483
+ []
484
+ end
485
+
486
+ def determine_auto_increment(column, connection: nil, is_primary: false)
487
+ # Handle different database adapters and Rails versions
488
+ return column.auto_increment? if column.respond_to?(:auto_increment?)
489
+ return column.serial? if column.respond_to?(:serial?)
490
+ return column.identity? if column.respond_to?(:identity?)
491
+
492
+ # Fallback: check based on sql_type and database-specific patterns
493
+ return false if column.sql_type.nil?
494
+
495
+ sql_type_lower = column.sql_type.downcase
496
+
497
+ case sql_type_lower
498
+ when /serial/, /identity/
499
+ # PostgreSQL serial, bigserial, identity columns
500
+ true
501
+ when /int.*auto_increment/, /auto_increment/
502
+ # MySQL auto_increment columns
503
+ true
504
+ else
505
+ # Check default value for sequence patterns (PostgreSQL)
506
+ return true if column.default.to_s =~ /nextval\(/i
507
+
508
+ # SQLite special case: INTEGER PRIMARY KEY is auto-increment
509
+ if connection && @config.adapter == :sqlite && is_primary && (sql_type_lower == 'integer')
510
+ return true
511
+ end
512
+
513
+ false
514
+ end
515
+ rescue StandardError
516
+ false
517
+ end
518
+
519
+ def extract_table_indexes(connection, table_name)
520
+ connection.indexes(table_name).map do |idx|
521
+ {
522
+ name: idx.name,
523
+ columns: idx.columns,
524
+ unique: idx.unique,
525
+ partial: idx.try(:where).present?,
526
+ type: idx.try(:type) || 'btree'
527
+ }
528
+ end
529
+ rescue StandardError => e
530
+ @config.logger.warn("[code_to_query] Failed to extract indexes for #{table_name}: #{e.message}")
531
+ []
532
+ end
533
+
534
+ def extract_foreign_keys(connection, table_name)
535
+ if connection.respond_to?(:foreign_keys)
536
+ connection.foreign_keys(table_name).map do |fk|
537
+ {
538
+ name: fk.name,
539
+ column: fk.column,
540
+ to_table: fk.to_table,
541
+ primary_key: fk.primary_key,
542
+ on_delete: fk.on_delete,
543
+ on_update: fk.on_update
544
+ }
545
+ end
546
+ else
547
+ []
548
+ end
549
+ rescue StandardError => e
550
+ @config.logger.warn("[code_to_query] Failed to extract foreign keys for #{table_name}: #{e.message}")
551
+ []
552
+ end
553
+
554
+ def extract_table_constraints(connection, table_name)
555
+ constraints = []
556
+
557
+ # Check constraints (PostgreSQL specific)
558
+ if @config.adapter == :postgres
559
+ begin
560
+ check_constraints = connection.execute(<<~SQL)
561
+ SELECT conname, pg_get_constraintdef(oid) as definition
562
+ FROM pg_constraint#{' '}
563
+ WHERE conrelid = '#{table_name}'::regclass#{' '}
564
+ AND contype = 'c'
565
+ SQL
566
+
567
+ check_constraints.each do |row|
568
+ constraints << {
569
+ name: row['conname'],
570
+ type: 'check',
571
+ definition: row['definition']
572
+ }
573
+ end
574
+ rescue StandardError
575
+ # Ignore if we can't get check constraints
576
+ end
577
+ end
578
+
579
+ constraints
580
+ rescue StandardError => e
581
+ @config.logger.warn("[code_to_query] Failed to extract constraints for #{table_name}: #{e.message}")
582
+ []
583
+ end
584
+
585
+ def extract_column_comment(connection, table_name, column_name)
586
+ case @config.adapter
587
+ when :postgres
588
+ result = connection.execute(<<~SQL)
589
+ SELECT col_description(pgc.oid, pga.attnum) as comment
590
+ FROM pg_class pgc
591
+ JOIN pg_attribute pga ON pgc.oid = pga.attrelid
592
+ WHERE pgc.relname = '#{table_name}'#{' '}
593
+ AND pga.attname = '#{column_name}'
594
+ SQL
595
+ result.first&.fetch('comment', nil)
596
+ when :mysql
597
+ result = connection.execute(<<~SQL)
598
+ SELECT COLUMN_COMMENT as comment
599
+ FROM INFORMATION_SCHEMA.COLUMNS
600
+ WHERE TABLE_SCHEMA = DATABASE()
601
+ AND TABLE_NAME = '#{table_name}'
602
+ AND COLUMN_NAME = '#{column_name}'
603
+ SQL
604
+ result.first&.fetch('comment', nil)
605
+ end
606
+ rescue StandardError
607
+ nil
608
+ end
609
+
610
+ def extract_schema_version(connection)
611
+ case @config.adapter
612
+ when :postgres
613
+ connection.execute('SELECT version()').first['version']
614
+ when :mysql
615
+ connection.execute('SELECT version()').first['version()']
616
+ when :sqlite
617
+ connection.execute('SELECT sqlite_version()').first['sqlite_version()']
618
+ else
619
+ 'unknown'
620
+ end
621
+ rescue StandardError
622
+ 'unknown'
623
+ end
624
+
625
+ def has_timestamps?(model)
626
+ model.column_names.include?('created_at') && model.column_names.include?('updated_at')
627
+ end
628
+
629
+ def has_soft_delete?(model)
630
+ model.column_names.include?('deleted_at') ||
631
+ (model.respond_to?(:paranoid?) && model.paranoid?)
632
+ end
633
+
634
+ def extract_model_associations(model)
635
+ associations = {}
636
+
637
+ model.reflect_on_all_associations.each do |assoc|
638
+ # Skip associations that point to non-existent classes
639
+ begin
640
+ # Check if the class exists by trying to constantize it
641
+ assoc.class_name.constantize
642
+ rescue NameError
643
+ @config.logger.info("[code_to_query] Skipping association #{assoc.name} for #{model.name}: class #{assoc.class_name} not found")
644
+ next
645
+ rescue StandardError => e
646
+ @config.logger.info("[code_to_query] Skipping association #{assoc.name} for #{model.name}: #{e.message}")
647
+ next
648
+ end
649
+
650
+ # Additional check: verify the association doesn't cause errors
651
+ begin
652
+ associations[assoc.name] = {
653
+ type: assoc.macro,
654
+ class_name: assoc.class_name,
655
+ foreign_key: assoc.foreign_key,
656
+ primary_key: assoc.association_primary_key,
657
+ through: assoc.options[:through],
658
+ dependent: assoc.options[:dependent],
659
+ polymorphic: assoc.options[:polymorphic],
660
+ as: assoc.options[:as]
661
+ }
662
+ rescue StandardError => e
663
+ @config.logger.info("[code_to_query] Skipping problematic association #{assoc.name} for #{model.name}: #{e.message}")
664
+ next
665
+ end
666
+ end
667
+
668
+ associations
669
+ rescue StandardError => e
670
+ @config.logger.warn("[code_to_query] Failed to extract associations for #{model.name}: #{e.message}")
671
+ {}
672
+ end
673
+
674
+ def extract_model_validations(model)
675
+ validations = {}
676
+
677
+ model.validators.each do |validator|
678
+ validator.attributes.each do |attr|
679
+ validations[attr] ||= []
680
+ validations[attr] << {
681
+ type: validator.class.name,
682
+ options: validator.options
683
+ }
684
+ end
685
+ end
686
+
687
+ validations
688
+ rescue StandardError => e
689
+ @config.logger.warn("[code_to_query] Failed to extract validations for #{model.name}: #{e.message}")
690
+ {}
691
+ end
692
+
693
+ def extract_model_enums(model)
694
+ enums = {}
695
+ begin
696
+ # 1) Native Rails enums
697
+ if model.respond_to?(:defined_enums)
698
+ model.defined_enums.each do |name, mapping|
699
+ enums[name] = mapping
700
+ end
701
+ end
702
+
703
+ # 2) Infer from mapping constants like ATTACHMENT_TYPES = { 'video'=>0, 'image'=>1 }
704
+ begin
705
+ column_names = model.column_names
706
+ rescue StandardError
707
+ column_names = []
708
+ end
709
+
710
+ model.constants(false).each do |const_name|
711
+ value = model.const_get(const_name)
712
+ next unless value.is_a?(Hash)
713
+ # Ensure values are integers (or coercible) and keys are strings/symbols
714
+ next unless value.keys.all? { |k| k.is_a?(String) || k.is_a?(Symbol) }
715
+ next unless value.values.all? { |v| v.is_a?(Integer) || (v.is_a?(String) && v.match?(/^\d+$/)) }
716
+
717
+ const_str = const_name.to_s
718
+ # Heuristic: map *_TYPES, *_STATUS, *_STATUSES, *_KINDS to *_type/_status columns
719
+ base = const_str.downcase
720
+ base = base.sub(/_types\z/, '').sub(/_type\z/, '').sub(/_statuses\z/, '').sub(/_status\z/, '').sub(/_kinds\z/, '').sub(/_kind\z/, '')
721
+
722
+ candidate_columns = []
723
+ candidate_columns << (base.end_with?('_') ? "#{base}type" : "#{base}_type")
724
+ candidate_columns << (base.end_with?('_') ? "#{base}status" : "#{base}_status")
725
+
726
+ matched_column = candidate_columns.find { |c| column_names.include?(c) }
727
+ next unless matched_column
728
+
729
+ mapping = {}
730
+ value.each do |k, v|
731
+ label = k.to_s
732
+ mapping[label] = Integer(v)
733
+ end
734
+
735
+ # Merge/augment if Rails enum already present for the same column
736
+ enums[matched_column] = (enums[matched_column] || {}).merge(mapping)
737
+ rescue StandardError
738
+ next
739
+ end
740
+
741
+ # 3) Fallback via generated enum helper methods (e.g., attachment_types)
742
+ begin
743
+ column_names.each do |col|
744
+ plural_method = if ''.respond_to?(:pluralize)
745
+ col.to_s.pluralize
746
+ else
747
+ "#{col}s"
748
+ end
749
+ next unless model.respond_to?(plural_method)
750
+
751
+ mapping = model.public_send(plural_method)
752
+ next unless mapping.is_a?(Hash) && mapping.keys.all? { |k| k.is_a?(String) || k.is_a?(Symbol) }
753
+ next unless mapping.values.all? { |v| v.is_a?(Integer) || (v.is_a?(String) && v.match?(/^\d+$/)) }
754
+
755
+ normalized = {}
756
+ mapping.each { |k, v| normalized[k.to_s] = Integer(v) }
757
+ enums[col.to_s] = (enums[col.to_s] || {}).merge(normalized)
758
+ end
759
+ rescue StandardError
760
+ # ignore
761
+ end
762
+ rescue StandardError => e
763
+ @config.logger.warn("[code_to_query] Failed to extract enums for #{model.name}: #{e.message}")
764
+ end
765
+ enums
766
+ end
767
+
768
+ def extract_model_scopes(model)
769
+ scopes = {}
770
+
771
+ # Prefer explicit registries if available
772
+ registry_names = []
773
+ if model.respond_to?(:scope_registry) && model.scope_registry.respond_to?(:each)
774
+ model.scope_registry.each do |name, body|
775
+ registry_names << name.to_s
776
+ scopes[name.to_s] = { type: 'scope', arity: (body.respond_to?(:arity) ? body.arity : 0) }
777
+ end
778
+ elsif model.respond_to?(:scopes)
779
+ model.scopes.each do |name, scope_proc|
780
+ registry_names << name.to_s
781
+ scopes[name.to_s] = { type: 'scope', arity: scope_proc&.arity || 0 }
782
+ end
783
+ elsif model.respond_to?(:defined_scopes)
784
+ model.defined_scopes.each_key do |name|
785
+ registry_names << name.to_s
786
+ scopes[name.to_s] = { type: 'scope', arity: 0 }
787
+ end
788
+ end
789
+
790
+ # Fallback: probe singleton methods that look like scopes and return a Relation
791
+ begin
792
+ candidate_methods = model.singleton_methods(false).select { |m| m.to_s.match?(/\A[a-z_][a-zA-Z0-9_]*\z/) }
793
+ disallow = %w[new create update delete destroy find where order limit select joins includes preload eager_load pluck first last all none not or count sum average minimum maximum]
794
+ candidate_methods.each do |m|
795
+ next if disallow.include?(m.to_s)
796
+ next if registry_names.include?(m.to_s)
797
+
798
+ meth = model.method(m)
799
+ ar = meth.arity
800
+ # Only try zero-arg or optional-arg methods
801
+ next unless ar.zero? || ar.negative?
802
+
803
+ rel = nil
804
+ begin
805
+ rel = meth.call
806
+ rescue ArgumentError
807
+ next
808
+ rescue StandardError
809
+ next
810
+ end
811
+ if defined?(ActiveRecord::Relation) && rel.is_a?(ActiveRecord::Relation)
812
+ scopes[m.to_s] ||= { type: 'scope', arity: ar }
813
+ end
814
+ end
815
+ rescue StandardError
816
+ # ignore
817
+ end
818
+
819
+ # Enhance scope entries with sample SQL and a compact where summary
820
+ scopes.each_key do |name|
821
+ rel = model.public_send(name)
822
+ if defined?(ActiveRecord::Relation) && rel.is_a?(ActiveRecord::Relation)
823
+ sql = rel.limit(1).to_sql
824
+ scopes[name][:sample_sql] = truncate(sql, 500)
825
+ where = extract_where_clause(sql)
826
+ scopes[name][:where] = truncate(where, 200) if where
827
+ end
828
+ rescue StandardError
829
+ # skip scopes that error out
830
+ end
831
+
832
+ scopes
833
+ rescue StandardError => e
834
+ @config.logger.warn("[code_to_query] Failed to extract scopes for #{model.name}: #{e.message}")
835
+ {}
836
+ end
837
+
838
+ def extract_where_clause(sql)
839
+ return nil unless sql.is_a?(String)
840
+
841
+ if (m = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER\s+BY|\s+LIMIT|\s+GROUP\s+BY|\z)/i))
842
+ # Normalize quotes and whitespace
843
+ m[1].gsub('"', '"').gsub(/\s+/, ' ').strip
844
+
845
+ end
846
+ end
847
+
848
+ def truncate(str, max)
849
+ return str unless str.is_a?(String)
850
+ return str if str.length <= max
851
+
852
+ "#{str[0, max - 3]}..."
853
+ end
854
+
855
+ def generate_table_synonyms(table_name)
856
+ synonyms = []
857
+
858
+ # Singular/plural variations
859
+ synonyms << if table_name.end_with?('s')
860
+ table_name.chomp('s')
861
+ else
862
+ "#{table_name}s"
863
+ end
864
+
865
+ # Basic common business synonyms - LLM will extend these based on actual schema
866
+ business_synonyms = {}
867
+
868
+ synonyms.concat(business_synonyms[table_name] || [])
869
+ synonyms.uniq
870
+ end
871
+
872
+ def generate_column_synonyms(column_name, sql_type)
873
+ synonyms = []
874
+
875
+ # Basic common column synonyms - LLM will extend these based on actual schema
876
+ column_synonyms = {
877
+ 'created_at' => %w[created_on date_created creation_date],
878
+ 'updated_at' => %w[updated_on date_updated modification_date],
879
+ 'email' => %w[email_address e_mail],
880
+ 'phone' => %w[phone_number telephone],
881
+ 'amount' => %w[total price cost value],
882
+ 'quantity' => %w[qty amount count],
883
+ 'description' => %w[desc summary details]
884
+ }
885
+
886
+ synonyms.concat(column_synonyms[column_name] || [])
887
+
888
+ # Type-based synonyms
889
+ if sql_type.to_s.match?(/money|decimal.*2|numeric.*2/) && !column_name.match?(/amount|price|cost/)
890
+ synonyms.push(%w[amount price cost])
891
+ end
892
+
893
+ synonyms.uniq
894
+ end
895
+
896
+ def load_business_glossary
897
+ glossary_path = File.join(File.dirname(@config.context_pack_path), 'business_glossary.json')
898
+
899
+ if File.exist?(glossary_path)
900
+ JSON.parse(File.read(glossary_path))
901
+ else
902
+ {}
903
+ end
904
+ rescue StandardError
905
+ {}
906
+ end
907
+
908
+ def enrich_glossary_with_llm(base_glossary, schema_data, models_data)
909
+ return base_glossary unless @config.auto_glossary_with_llm
910
+ return base_glossary unless @config.openai_api_key
911
+
912
+ # Build a compact prompt for the LLM to suggest synonyms and intent hints
913
+ begin
914
+ require 'net/http'
915
+ require 'uri'
916
+ require 'json'
917
+
918
+ tables = Array(schema_data[:tables] || schema_data['tables'])
919
+ table_summaries = tables.take(20).map do |t|
920
+ name = t[:name] || t['name']
921
+ cols = Array(t[:columns] || t['columns']).map { |c| c[:name] || c['name'] }
922
+ fks = cols.select { |c| c.to_s.end_with?('_id') }
923
+ "#{name}: cols(#{cols.take(12).join(', ')}), fks(#{fks.join(', ')})"
924
+ end
925
+
926
+ scopes = models_data[:scopes] || models_data['scopes'] || {}
927
+ scope_lines = scopes.flat_map do |model_name, scope_hash|
928
+ next [] unless scope_hash.is_a?(Hash)
929
+
930
+ scope_hash.map do |scope_name, meta|
931
+ where = meta[:where] || meta['where']
932
+ "#{model_name}.#{scope_name}: #{where}" if where
933
+ end.compact
934
+ end.take(40)
935
+
936
+ system_prompt = <<~P
937
+ You are analyzing a database schema to understand the business domain and create a comprehensive glossary.
938
+
939
+ Your task: Infer the business domain from table names, column names, and relationships, then create mappings that help users query with natural language.
940
+
941
+ Analyze the schema to understand:
942
+ 1. What business domain this represents (e.g. e-commerce, education, CRM, content management, etc.)
943
+ 2. What real-world entities and relationships exist
944
+ 3. How a user would naturally refer to these entities and relationships
945
+
946
+ Create mappings for:
947
+ - Business entities: How users refer to main concepts
948
+ - Relationships: How users describe connections between entities (e.g., "answered by", "created by", "belongs to")
949
+ - Actions/States: How users describe actions or states (e.g., "completed", "failed", "pending")
950
+ - IDs and Foreign Keys: How users refer to specific entities
951
+ - Domain-specific terms: Technical terms users might use differently than column names
952
+
953
+ For relationship queries, create special relationship mappings:
954
+ - Key format: "relationship_[action]_[entity]"#{' '}
955
+ - Value: Array describing the EXISTS pattern needed
956
+
957
+ Rules:
958
+ - Only use tables and columns that exist in the schema
959
+ - Infer domain from naming patterns, foreign keys, and table relationships
960
+ - Don't assume any specific business domain
961
+ - Create practical mappings a real user would need
962
+ - Output JSON: {"term": ["synonym1", "synonym2"], "table.column": ["user_term1"], "relationship_action_entity": ["EXISTS pattern hint"]}
963
+
964
+ Keep under #{@config.max_glossary_suggestions} entries total.
965
+ P
966
+
967
+ # Analyze foreign key relationships to understand business logic
968
+ fk_relationships = tables.flat_map do |t|
969
+ table_name = t[:name] || t['name']
970
+ fks = Array(t[:foreign_keys] || t['foreign_keys'])
971
+ fks.map do |fk|
972
+ "#{table_name}.#{fk[:column] || fk['column']} -> #{fk[:to_table] || fk['to_table']}"
973
+ end
974
+ end
975
+
976
+ user_prompt = <<~U
977
+ DATABASE SCHEMA ANALYSIS:
978
+
979
+ Tables and Columns:
980
+ #{table_summaries.join("\n")}
981
+
982
+ Foreign Key Relationships (showing business connections):
983
+ #{fk_relationships.join("\n")}
984
+
985
+ Model Scopes (showing common business queries):
986
+ #{scope_lines.join("\n")}
987
+
988
+ Current glossary has: #{base_glossary.keys.take(20).join(', ')}
989
+
990
+ TASK: Analyze this schema and infer the business domain. Create a glossary that maps how real users would naturally refer to these entities and relationships. Focus especially on understanding what the foreign key relationships tell us about the business logic.
991
+ U
992
+
993
+ messages = [
994
+ { role: 'system', content: system_prompt },
995
+ { role: 'user', content: user_prompt }
996
+ ]
997
+
998
+ client = @config.llm_client || CodeToQuery::LLMClient.new(@config)
999
+ text = client.chat(messages: messages)
1000
+ suggestions = begin
1001
+ JSON.parse(text)
1002
+ rescue StandardError
1003
+ {}
1004
+ end
1005
+ if suggestions.is_a?(Hash)
1006
+ return base_glossary.merge(suggestions)
1007
+ end
1008
+ rescue StandardError => e
1009
+ @config.logger.warn("[code_to_query] LLM glossary enrichment failed: #{e.message}")
1010
+ end
1011
+
1012
+ base_glossary
1013
+ end
1014
+
1015
+ def extract_authorization_policies
1016
+ policies = {}
1017
+
1018
+ # Check for Pundit policies
1019
+ policies[:pundit] = extract_pundit_policies if defined?(Pundit)
1020
+
1021
+ # Check for CanCanCan abilities
1022
+ policies[:cancan] = extract_cancan_policies if defined?(CanCan)
1023
+
1024
+ policies
1025
+ rescue StandardError => e
1026
+ warn "[code_to_query] Failed to extract authorization policies: #{e.message}"
1027
+ {}
1028
+ end
1029
+
1030
+ def extract_pundit_policies
1031
+ # This would extract Pundit policy information
1032
+ # Implementation depends on your specific setup
1033
+ {}
1034
+ end
1035
+
1036
+ def extract_cancan_policies
1037
+ # This would extract CanCanCan ability information
1038
+ # Implementation depends on your specific setup
1039
+ {}
1040
+ end
1041
+
1042
+ def write_pack(pack)
1043
+ path = @config.context_pack_path
1044
+ dir = File.dirname(path)
1045
+ FileUtils.mkdir_p(dir)
1046
+ File.write(path, pack.to_json)
1047
+
1048
+ # Also write a human-readable version
1049
+ readme_path = File.join(dir, 'README.md')
1050
+ File.write(readme_path, generate_context_readme(pack))
1051
+ end
1052
+
1053
+ def generate_context_readme(pack)
1054
+ <<~README
1055
+ # CodeToQuery Context Pack
1056
+
1057
+ Generated on: #{Time.now}
1058
+ Database Adapter: #{pack.schema[:adapter]}
1059
+ Schema Version: #{pack.schema[:version]}
1060
+
1061
+ ## Tables (#{pack.schema[:tables].length})
1062
+
1063
+ #{pack.schema[:tables].map do |table|
1064
+ "- **#{table[:name]}** (#{table[:columns].length} columns)"
1065
+ end.join("\n")}
1066
+
1067
+ ## Models (#{pack.models[:models].length})
1068
+
1069
+ #{pack.models[:models].map do |name, info|
1070
+ "- **#{name}** → `#{info[:table_name]}`"
1071
+ end.join("\n")}
1072
+
1073
+ ## Glossary Terms (#{pack.glossary.length})
1074
+
1075
+ #{pack.glossary.map do |term, synonyms|
1076
+ "- **#{term}**: #{Array(synonyms).join(', ')}"
1077
+ end.join("\n")}
1078
+
1079
+ This context pack is used by CodeToQuery to understand your database schema,
1080
+ model relationships, and business terminology for accurate natural language
1081
+ query translation.
1082
+ README
1083
+ end
1084
+ end
1085
+ # rubocop:enable Metrics/ClassLength
1086
+ end
1087
+ end