code_to_query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,289 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodeToQuery
4
+ class Planner
5
+ def initialize(config)
6
+ @config = config
7
+ end
8
+
9
+ def plan(prompt:, schema:, allow_tables:)
10
+ schema ||= load_context_pack
11
+ allow_tables = Array(allow_tables).compact
12
+
13
+ attempt = 0
14
+ last_error = nil
15
+ feedback = nil
16
+ provider = build_provider
17
+ max_attempts = @config.planner_max_attempts || 1
18
+
19
+ while attempt < max_attempts
20
+ attempt += 1
21
+ begin
22
+ intent = provider.extract_intent(
23
+ prompt: build_prompt_with_feedback(prompt, feedback),
24
+ schema: schema,
25
+ allow_tables: allow_tables
26
+ )
27
+
28
+ # Optional schema strictness pass: drop filters referencing unknown columns
29
+ if @config.planner_feedback_mode.to_s == 'schema_strict'
30
+ intent = strip_unknown_columns(intent, schema)
31
+ end
32
+
33
+ # Expose provider metrics if available
34
+ if provider.respond_to?(:metrics) && provider.metrics.is_a?(Hash)
35
+ intent = intent.merge('_metrics' => provider.metrics)
36
+ end
37
+
38
+ # Heuristic backfill of missing params from the prompt (IDs and enum-like labels)
39
+ intent = backfill_params_from_prompt(prompt, intent, schema)
40
+
41
+ return intent
42
+ rescue StandardError => e
43
+ last_error = e
44
+ feedback = generate_feedback(e)
45
+ @config.logger.warn("[code_to_query] Planning attempt #{attempt} failed: #{e.message}")
46
+ end
47
+ end
48
+
49
+ @config.logger.warn("[code_to_query] Query planning failed after #{max_attempts} attempts: #{last_error&.message}")
50
+ fallback_intent(allow_tables)
51
+ rescue StandardError => e
52
+ @config.logger.warn("[code_to_query] Query planning failed: #{e.message}")
53
+ fallback_intent(allow_tables)
54
+ end
55
+
56
+ private
57
+
58
+ def build_provider
59
+ case @config.provider
60
+ when :openai
61
+ Providers::OpenAI.new(@config)
62
+ when :local
63
+ Providers::Local.new(@config)
64
+ else
65
+ detect_best_provider
66
+ end
67
+ end
68
+
69
+ def detect_best_provider
70
+ if @config.openai_api_key.present? && !@config.stub_llm
71
+ Providers::OpenAI.new(@config)
72
+ else
73
+ Providers::Local.new(@config)
74
+ end
75
+ end
76
+
77
+ def load_context_pack
78
+ path = @config.context_pack_path
79
+ unless File.exist?(path)
80
+ begin
81
+ # Attempt to bootstrap a context pack automatically if missing
82
+ if defined?(CodeToQuery::Context::Builder)
83
+ CodeToQuery::Context::Builder.new(@config).bootstrap!
84
+ end
85
+ rescue StandardError => e
86
+ @config.logger.warn("[code_to_query] Auto-bootstrap of context pack failed: #{e.message}")
87
+ end
88
+ end
89
+
90
+ return {} unless File.exist?(path)
91
+
92
+ JSON.parse(File.read(path))
93
+ rescue StandardError => e
94
+ @config.logger.warn("[code_to_query] Failed to load context pack: #{e.message}")
95
+ {}
96
+ end
97
+
98
+ def fallback_intent(allow_tables)
99
+ intent = {
100
+ 'type' => 'select',
101
+ 'table' => Array(allow_tables).compact.first || 'main_table',
102
+ 'columns' => ['*'],
103
+ 'filters' => [],
104
+ 'order' => [],
105
+ 'params' => {}
106
+ }
107
+ intent['limit'] = @config.default_limit if @config.default_limit
108
+ intent
109
+ end
110
+
111
+ def build_prompt_with_feedback(prompt, feedback)
112
+ return prompt if feedback.to_s.strip.empty?
113
+
114
+ "#{prompt}\n\nConstraints/feedback: #{feedback}"
115
+ end
116
+
117
+ def generate_feedback(error)
118
+ return '' unless @config.planner_feedback_mode && @config.planner_feedback_mode != :none
119
+
120
+ case error
121
+ when ArgumentError
122
+ 'Ensure all columns and tables exist in the provided schema and avoid unknown fields. For EXISTS, provide related_table and fk_column.'
123
+ else
124
+ 'Avoid inventing tables/columns; map business terms to schema; use EXISTS for relationships; set column to "id" in EXISTS filters.'
125
+ end
126
+ end
127
+
128
+ def strip_unknown_columns(intent, schema)
129
+ return intent unless intent.is_a?(Hash)
130
+
131
+ tables = Array(schema.dig('schema', 'tables') || schema['tables'] || [])
132
+ table_name = intent['table']
133
+ table = tables.find { |t| (t['name'] || t[:name]).to_s == table_name.to_s }
134
+ return intent unless table
135
+
136
+ columns = Array(table['columns'] || table[:columns]).map { |c| c['name'] || c[:name] }.compact
137
+
138
+ if intent['filters'].is_a?(Array)
139
+ intent['filters'] = intent['filters'].select do |f|
140
+ op = f['op'].to_s
141
+ next true if %w[exists not_exists].include?(op)
142
+
143
+ columns.include?(f['column'].to_s)
144
+ end
145
+ end
146
+
147
+ if intent['order'].is_a?(Array)
148
+ intent['order'] = intent['order'].select { |o| columns.include?(o['column'].to_s) }
149
+ end
150
+
151
+ if intent['columns'].is_a?(Array)
152
+ intent['columns'] = intent['columns'].select { |c| c == '*' || columns.include?(c.to_s) }
153
+ end
154
+
155
+ intent
156
+ rescue StandardError
157
+ intent
158
+ end
159
+
160
+ # Fill missing intent params with simple values parsed from the prompt.
161
+ # Example: "student with id 17963" -> maps first integer to first missing *_id param.
162
+ def backfill_params_from_prompt(prompt, intent, schema)
163
+ return intent unless prompt.is_a?(String)
164
+ return intent unless intent.is_a?(Hash)
165
+
166
+ numbers = begin
167
+ prompt.scan(/\b\d+\b/).map { |n| Integer(n) }
168
+ rescue StandardError
169
+ []
170
+ end
171
+ return intent if numbers.empty?
172
+
173
+ params = (intent['params'] || {}).dup
174
+ remaining = numbers.dup
175
+
176
+ # Helper to assign a value for a filter if missing
177
+ assign_for = lambda do |param_key|
178
+ return if param_key.nil? || param_key.to_s.strip.empty?
179
+ return if params.key?(param_key.to_s)
180
+
181
+ value = remaining.shift
182
+ params[param_key.to_s] = value if value
183
+ end
184
+
185
+ # Main table filters first, prioritize *_id columns
186
+ Array(intent['filters']).each do |f|
187
+ op = f['op'].to_s
188
+ next unless ['=', 'like', 'ilike', 'in', 'between'].include?(op) || %w[exists not_exists].include?(op)
189
+
190
+ if %w[exists not_exists].include?(op)
191
+ Array(f['related_filters']).each do |rf|
192
+ pkey = rf['param'] || rf['column']
193
+ if rf['column'].to_s.end_with?('_id')
194
+ assign_for.call(pkey)
195
+ end
196
+ end
197
+ else
198
+ pkey = f['param'] || f['column']
199
+ if f['column'].to_s.end_with?('_id')
200
+ assign_for.call(pkey)
201
+ end
202
+ end
203
+ end
204
+
205
+ # Enum-like backfill using model enums from schema (if available)
206
+ tokens = prompt.scan(/[a-z0-9_]+/i).map(&:downcase)
207
+ enums_by_table = extract_enums_index(schema)
208
+
209
+ # Main table columns
210
+ table_name = intent['table']
211
+ if table_name && enums_by_table[table_name]
212
+ Array(intent['filters']).each do |f|
213
+ next unless f.is_a?(Hash)
214
+ next unless f['op'].to_s == '='
215
+
216
+ col = f['column']
217
+ next unless col
218
+
219
+ pkey = (f['param'] || col).to_s
220
+ next if params.key?(pkey)
221
+
222
+ mapping = enums_by_table[table_name][col]
223
+ next unless mapping.is_a?(Hash) && mapping.any?
224
+
225
+ label = tokens.find { |tk| mapping.key?(tk) }
226
+ params[pkey] = label if label
227
+ end
228
+ end
229
+
230
+ # Related table columns in EXISTS
231
+ Array(intent['filters']).each do |f|
232
+ next unless f.is_a?(Hash) && %w[exists not_exists].include?(f['op'].to_s)
233
+
234
+ rtable = f['related_table']
235
+ next unless rtable && enums_by_table[rtable]
236
+
237
+ Array(f['related_filters']).each do |rf|
238
+ next unless rf.is_a?(Hash) && rf['op'].to_s == '='
239
+
240
+ rcol = rf['column']
241
+ next unless rcol
242
+
243
+ rpkey = (rf['param'] || rcol).to_s
244
+ next if params.key?(rpkey)
245
+
246
+ rmapping = enums_by_table[rtable][rcol]
247
+ next unless rmapping.is_a?(Hash) && rmapping.any?
248
+
249
+ rlabel = tokens.find { |tk| rmapping.key?(tk) }
250
+ params[rpkey] = rlabel if rlabel
251
+ end
252
+ end
253
+
254
+ intent.merge('params' => params)
255
+ rescue StandardError
256
+ intent
257
+ end
258
+
259
+ def extract_enums_index(schema)
260
+ index = Hash.new { |h, k| h[k] = {} }
261
+ return index unless schema.is_a?(Hash)
262
+
263
+ models = schema.dig('models', 'models')
264
+ return index unless models.is_a?(Hash)
265
+
266
+ models.each_value do |meta|
267
+ next unless meta.is_a?(Hash)
268
+
269
+ table = meta['table_name'] || meta[:table_name]
270
+ next unless table
271
+
272
+ enums = meta['enums'] || meta[:enums] || {}
273
+ next unless enums.is_a?(Hash)
274
+
275
+ enums.each do |col, mapping|
276
+ # Normalize mapping keys to downcased strings
277
+ next unless mapping.is_a?(Hash)
278
+
279
+ norm = {}
280
+ mapping.each { |k, v| norm[k.to_s.downcase] = v }
281
+ index[table.to_s][col.to_s] = norm
282
+ end
283
+ end
284
+ index
285
+ rescue StandardError
286
+ {}
287
+ end
288
+ end
289
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require 'pundit'
5
+ rescue LoadError
6
+ end
7
+
8
+ module CodeToQuery
9
+ module Policies
10
+ class PunditAdapter
11
+ def call(current_user, table:, intent: nil) # rubocop:disable Lint/UnusedMethodArgument
12
+ return {} unless defined?(Pundit)
13
+
14
+ info = {
15
+ enforced_predicates: inferred_tenant_predicates(current_user, table),
16
+ allowed_tables: [],
17
+ allowed_columns: {}
18
+ }
19
+
20
+ model = infer_model_for_table(table)
21
+ if model
22
+ begin
23
+ Pundit.policy_scope!(current_user, model)
24
+ info[:allowed_tables] << table
25
+ rescue StandardError
26
+ end
27
+
28
+ policy = Pundit.policy(current_user, model)
29
+ if policy.respond_to?(:permitted_columns)
30
+ cols = Array(policy.permitted_columns).map(&:to_s)
31
+ info[:allowed_columns][table] = cols if cols.any?
32
+ end
33
+ end
34
+
35
+ info
36
+ end
37
+
38
+ private
39
+
40
+ def infer_model_for_table(table_name)
41
+ return nil unless defined?(ActiveRecord::Base)
42
+ return nil unless table_name
43
+
44
+ candidates = [
45
+ table_name.to_s.singularize.camelize,
46
+ table_name.to_s.camelize,
47
+ table_name.to_s.singularize.camelize.gsub(/s$/, '')
48
+ ]
49
+
50
+ candidates.each do |klass|
51
+ k = klass.constantize
52
+ return k if k < ActiveRecord::Base && k.table_name == table_name
53
+ rescue NameError
54
+ next
55
+ end
56
+ nil
57
+ end
58
+
59
+ def inferred_tenant_predicates(current_user, _table)
60
+ return {} unless current_user
61
+ return {} unless current_user.respond_to?(:company_id)
62
+
63
+ if current_user.company_id
64
+ { company_id: current_user.company_id }
65
+ else
66
+ {}
67
+ end
68
+ end
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,173 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodeToQuery
4
+ module Providers
5
+ class Base
6
+ attr_reader :metrics
7
+
8
+ def initialize(config)
9
+ @config = config
10
+ @metrics = {}
11
+ end
12
+
13
+ def extract_intent(prompt:, schema:, allow_tables:)
14
+ raise NotImplementedError, 'Subclasses must implement #extract_intent'
15
+ end
16
+
17
+ protected
18
+
19
+ # rubocop:disable Metrics/BlockNesting
20
+ def build_system_context(schema, allow_tables)
21
+ # Support passing either a raw schema hash ({'tables'=>[...]})
22
+ # or a full context pack ({'schema'=>{...}, 'models'=>{...}, 'glossary'=>{...}})
23
+ raw_schema = schema
24
+ model_defs = nil
25
+ scopes_map = nil
26
+ glossary = {}
27
+ if schema.is_a?(Hash) && schema['schema'].is_a?(Hash)
28
+ raw_schema = schema['schema']
29
+ if schema['models'].is_a?(Hash)
30
+ model_defs = schema['models']['models']
31
+ scopes_map = schema['models']['scopes']
32
+ end
33
+ glossary = schema['glossary'] || {}
34
+ end
35
+
36
+ available_tables = if allow_tables.any?
37
+ allow_tables
38
+ elsif raw_schema.is_a?(Hash) && raw_schema['tables']
39
+ raw_schema['tables'].map { |t| t['name'] || t[:name] }.compact
40
+ else
41
+ []
42
+ end
43
+
44
+ schema_info = if raw_schema.is_a?(Hash) && raw_schema['tables']
45
+ raw_schema['tables'].map do |table|
46
+ table_name = table['name'] || table[:name]
47
+ cols = Array(table['columns'] || table[:columns])
48
+ col_names = cols.map { |c| c['name'] || c[:name] }.compact
49
+ fks = col_names.select { |c| c.end_with?('_id') }
50
+ col_summary = col_names.take(10).join(', ')
51
+ fk_summary = fks.any? ? " | fks: #{fks.join(', ')}" : ''
52
+
53
+ scope_summary = ''
54
+ if model_defs && scopes_map
55
+ # find model for this table
56
+ model_name, _def = model_defs.find { |_mn, md| (md['table_name'] || md[:table_name]) == table_name }
57
+ if model_name
58
+ scopes_for_model = scopes_map[model_name] || scopes_map[model_name.to_sym]
59
+ if scopes_for_model.is_a?(Hash) && scopes_for_model.any?
60
+ # Include compact where summaries when available
61
+ pairs = scopes_for_model.to_a.take(4).map do |(sname, meta)|
62
+ w = meta['where'] || meta[:where]
63
+ w ? "#{sname}: #{w}" : sname.to_s
64
+ end
65
+ scope_summary = pairs.any? ? " | scopes: #{pairs.join('; ')}" : ''
66
+ end
67
+ # include enum mapping if present
68
+ enums = model_defs.dig(model_name, 'enums') || model_defs.dig(model_name.to_sym, :enums) || {}
69
+ if enums.is_a?(Hash) && enums.any?
70
+ enum_pairs = enums.to_a.take(3).map do |(col, mapping)|
71
+ sample = mapping.is_a?(Hash) ? mapping.to_a.take(2).map { |k, v| "#{k}=#{v}" }.join(', ') : ''
72
+ sample.empty? ? col.to_s : "#{col}(#{sample})"
73
+ end
74
+ scope_summary += enum_pairs.any? ? " | enums: #{enum_pairs.join('; ')}" : ''
75
+ end
76
+ end
77
+ end
78
+
79
+ "#{table_name}: #{col_summary}#{fk_summary}#{scope_summary}"
80
+ end.join("\n")
81
+ else
82
+ 'No schema information available'
83
+ end
84
+
85
+ # Include glossary information for better business understanding
86
+ glossary_info = if glossary.any?
87
+ "\nBusiness Glossary (user terms -> database concepts):\n" \
88
+ "#{glossary.map { |term, synonyms| "#{term}: #{Array(synonyms).join(', ')}" }.join("\n")}"
89
+ else
90
+ ''
91
+ end
92
+
93
+ {
94
+ available_tables: available_tables,
95
+ schema_info: schema_info + glossary_info,
96
+ constraints: build_constraints_info
97
+ }
98
+ end
99
+ # rubocop:enable Metrics/BlockNesting
100
+
101
+ def build_constraints_info
102
+ base_constraints = [
103
+ 'Only generate SELECT queries',
104
+ 'All values must be parameterized',
105
+ 'Use standard SQL operators: =, !=, <>, >, <, >=, <=, between, in, like, ilike',
106
+ 'Prefer EXISTS/NOT EXISTS when expressing presence/absence of related rows',
107
+ 'DISTINCT is allowed for unique result sets',
108
+ 'DISTINCT ON (columns) is supported for PostgreSQL',
109
+ "For 'top N' queries, use ORDER BY with LIMIT"
110
+ ]
111
+
112
+ limit_constraints = build_limit_constraints
113
+ aggregation_constraints = build_aggregation_constraints
114
+
115
+ base_constraints + limit_constraints + aggregation_constraints
116
+ end
117
+
118
+ def build_limit_constraints
119
+ constraints = []
120
+
121
+ if @config.default_limit
122
+ constraints << "Default LIMIT: #{@config.default_limit} for SELECT queries"
123
+ end
124
+
125
+ constraints << if @config.count_limit
126
+ "COUNT queries limited to #{@config.count_limit} rows"
127
+ else
128
+ 'COUNT queries have no automatic LIMIT'
129
+ end
130
+
131
+ constraints << if @config.aggregation_limit
132
+ "Aggregation queries limited to #{@config.aggregation_limit} rows"
133
+ else
134
+ 'Aggregation queries (SUM, AVG, MAX, MIN) have no automatic LIMIT'
135
+ end
136
+
137
+ if @config.distinct_limit
138
+ constraints << "DISTINCT queries limited to #{@config.distinct_limit} rows"
139
+ end
140
+
141
+ if @config.exists_limit
142
+ constraints << "EXISTS checks automatically use LIMIT #{@config.exists_limit}"
143
+ end
144
+
145
+ constraints
146
+ end
147
+
148
+ def build_aggregation_constraints
149
+ [
150
+ 'COUNT(*) and COUNT(column) are supported',
151
+ 'SUM, AVG, MAX, MIN aggregations are supported',
152
+ 'GROUP BY is supported for aggregations',
153
+ 'Multiple aggregations can be combined in a single query'
154
+ ]
155
+ end
156
+
157
+ def validate_and_enhance_intent(intent, allow_tables)
158
+ intent['type'] ||= 'select'
159
+ intent['columns'] ||= ['*']
160
+ intent['filters'] ||= []
161
+ intent['order'] ||= []
162
+ intent['limit'] ||= @config.default_limit if @config.default_limit
163
+ intent['params'] ||= {}
164
+
165
+ if allow_tables.any? && !allow_tables.include?(intent['table'])
166
+ raise ArgumentError, "Table '#{intent['table']}' not in allowlist: #{allow_tables.join(', ')}"
167
+ end
168
+
169
+ intent
170
+ end
171
+ end
172
+ end
173
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodeToQuery
4
+ module Providers
5
+ class Local < Base
6
+ def extract_intent(prompt:, schema:, allow_tables:)
7
+ @prompt = prompt.to_s.strip
8
+ @schema = schema || {}
9
+ @allow_tables = Array(allow_tables).compact
10
+ started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
11
+
12
+ table = select_table
13
+ table ||= 'main_table' # Back-compat default expected by specs
14
+
15
+ intent = {
16
+ 'type' => 'select',
17
+ 'table' => table,
18
+ 'columns' => ['*'],
19
+ 'filters' => [],
20
+ 'order' => [],
21
+ 'params' => {}
22
+ }
23
+
24
+ # Only set limit when configured (avoid nil which fails validation)
25
+ intent['limit'] = @config.default_limit if @config.default_limit
26
+
27
+ result = validate_and_enhance_intent(intent, allow_tables)
28
+
29
+ # Lightweight metrics: elapsed and estimated tokens from prompt + schema table names
30
+ elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
31
+ prompt_blob = build_prompt_blob(@prompt, @schema)
32
+ est = estimate_tokens(prompt_blob)
33
+ @metrics[:prompt_tokens] = est
34
+ @metrics[:completion_tokens] = 0
35
+ @metrics[:total_tokens] = est
36
+ @metrics[:elapsed_s] = elapsed
37
+
38
+ result
39
+ end
40
+
41
+ private
42
+
43
+ def build_prompt_blob(prompt, schema)
44
+ tables = Array(schema['tables'] || schema.dig('schema', 'tables') || [])
45
+ table_names = tables.map { |t| t['name'] || t[:name] }.compact.join(',')
46
+ [prompt.to_s, table_names].join("\n")
47
+ end
48
+
49
+ def estimate_tokens(text)
50
+ (text.to_s.length / 4.0).ceil
51
+ end
52
+
53
+ def select_table
54
+ return @allow_tables.first if @allow_tables.any?
55
+
56
+ tables = extract_schema_tables
57
+ return tables.first[:name] if tables.any?
58
+
59
+ nil
60
+ end
61
+
62
+ def extract_schema_tables
63
+ return [] unless @schema.is_a?(Hash)
64
+
65
+ raw_tables = if @schema['tables'].is_a?(Array)
66
+ @schema['tables']
67
+ elsif @schema['schema'].is_a?(Hash) && @schema['schema']['tables'].is_a?(Array)
68
+ @schema['schema']['tables']
69
+ else
70
+ []
71
+ end
72
+
73
+ Array(raw_tables).map do |table|
74
+ next unless table.is_a?(Hash)
75
+
76
+ {
77
+ name: table['name'] || table[:name],
78
+ columns: Array(table['columns'] || table[:columns])
79
+ }
80
+ end.compact
81
+ end
82
+ end
83
+ end
84
+ end