code_to_query 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +23 -0
- data/README.md +167 -0
- data/lib/code_to_query/compiler.rb +674 -0
- data/lib/code_to_query/configuration.rb +92 -0
- data/lib/code_to_query/context/builder.rb +1087 -0
- data/lib/code_to_query/context/pack.rb +36 -0
- data/lib/code_to_query/errors.rb +5 -0
- data/lib/code_to_query/guardrails/explain_gate.rb +229 -0
- data/lib/code_to_query/guardrails/sql_linter.rb +335 -0
- data/lib/code_to_query/llm_client.rb +46 -0
- data/lib/code_to_query/performance/cache.rb +250 -0
- data/lib/code_to_query/performance/optimizer.rb +396 -0
- data/lib/code_to_query/planner.rb +289 -0
- data/lib/code_to_query/policies/pundit_adapter.rb +71 -0
- data/lib/code_to_query/providers/base.rb +173 -0
- data/lib/code_to_query/providers/local.rb +84 -0
- data/lib/code_to_query/providers/openai.rb +581 -0
- data/lib/code_to_query/query.rb +385 -0
- data/lib/code_to_query/railtie.rb +16 -0
- data/lib/code_to_query/runner.rb +188 -0
- data/lib/code_to_query/validator.rb +203 -0
- data/lib/code_to_query/version.rb +6 -0
- data/lib/code_to_query.rb +90 -0
- data/tasks/code_to_query.rake +326 -0
- metadata +225 -0
@@ -0,0 +1,289 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CodeToQuery
|
4
|
+
class Planner
|
5
|
+
def initialize(config)
|
6
|
+
@config = config
|
7
|
+
end
|
8
|
+
|
9
|
+
def plan(prompt:, schema:, allow_tables:)
|
10
|
+
schema ||= load_context_pack
|
11
|
+
allow_tables = Array(allow_tables).compact
|
12
|
+
|
13
|
+
attempt = 0
|
14
|
+
last_error = nil
|
15
|
+
feedback = nil
|
16
|
+
provider = build_provider
|
17
|
+
max_attempts = @config.planner_max_attempts || 1
|
18
|
+
|
19
|
+
while attempt < max_attempts
|
20
|
+
attempt += 1
|
21
|
+
begin
|
22
|
+
intent = provider.extract_intent(
|
23
|
+
prompt: build_prompt_with_feedback(prompt, feedback),
|
24
|
+
schema: schema,
|
25
|
+
allow_tables: allow_tables
|
26
|
+
)
|
27
|
+
|
28
|
+
# Optional schema strictness pass: drop filters referencing unknown columns
|
29
|
+
if @config.planner_feedback_mode.to_s == 'schema_strict'
|
30
|
+
intent = strip_unknown_columns(intent, schema)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Expose provider metrics if available
|
34
|
+
if provider.respond_to?(:metrics) && provider.metrics.is_a?(Hash)
|
35
|
+
intent = intent.merge('_metrics' => provider.metrics)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Heuristic backfill of missing params from the prompt (IDs and enum-like labels)
|
39
|
+
intent = backfill_params_from_prompt(prompt, intent, schema)
|
40
|
+
|
41
|
+
return intent
|
42
|
+
rescue StandardError => e
|
43
|
+
last_error = e
|
44
|
+
feedback = generate_feedback(e)
|
45
|
+
@config.logger.warn("[code_to_query] Planning attempt #{attempt} failed: #{e.message}")
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
@config.logger.warn("[code_to_query] Query planning failed after #{max_attempts} attempts: #{last_error&.message}")
|
50
|
+
fallback_intent(allow_tables)
|
51
|
+
rescue StandardError => e
|
52
|
+
@config.logger.warn("[code_to_query] Query planning failed: #{e.message}")
|
53
|
+
fallback_intent(allow_tables)
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def build_provider
|
59
|
+
case @config.provider
|
60
|
+
when :openai
|
61
|
+
Providers::OpenAI.new(@config)
|
62
|
+
when :local
|
63
|
+
Providers::Local.new(@config)
|
64
|
+
else
|
65
|
+
detect_best_provider
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def detect_best_provider
|
70
|
+
if @config.openai_api_key.present? && !@config.stub_llm
|
71
|
+
Providers::OpenAI.new(@config)
|
72
|
+
else
|
73
|
+
Providers::Local.new(@config)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def load_context_pack
|
78
|
+
path = @config.context_pack_path
|
79
|
+
unless File.exist?(path)
|
80
|
+
begin
|
81
|
+
# Attempt to bootstrap a context pack automatically if missing
|
82
|
+
if defined?(CodeToQuery::Context::Builder)
|
83
|
+
CodeToQuery::Context::Builder.new(@config).bootstrap!
|
84
|
+
end
|
85
|
+
rescue StandardError => e
|
86
|
+
@config.logger.warn("[code_to_query] Auto-bootstrap of context pack failed: #{e.message}")
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
return {} unless File.exist?(path)
|
91
|
+
|
92
|
+
JSON.parse(File.read(path))
|
93
|
+
rescue StandardError => e
|
94
|
+
@config.logger.warn("[code_to_query] Failed to load context pack: #{e.message}")
|
95
|
+
{}
|
96
|
+
end
|
97
|
+
|
98
|
+
def fallback_intent(allow_tables)
|
99
|
+
intent = {
|
100
|
+
'type' => 'select',
|
101
|
+
'table' => Array(allow_tables).compact.first || 'main_table',
|
102
|
+
'columns' => ['*'],
|
103
|
+
'filters' => [],
|
104
|
+
'order' => [],
|
105
|
+
'params' => {}
|
106
|
+
}
|
107
|
+
intent['limit'] = @config.default_limit if @config.default_limit
|
108
|
+
intent
|
109
|
+
end
|
110
|
+
|
111
|
+
def build_prompt_with_feedback(prompt, feedback)
|
112
|
+
return prompt if feedback.to_s.strip.empty?
|
113
|
+
|
114
|
+
"#{prompt}\n\nConstraints/feedback: #{feedback}"
|
115
|
+
end
|
116
|
+
|
117
|
+
def generate_feedback(error)
|
118
|
+
return '' unless @config.planner_feedback_mode && @config.planner_feedback_mode != :none
|
119
|
+
|
120
|
+
case error
|
121
|
+
when ArgumentError
|
122
|
+
'Ensure all columns and tables exist in the provided schema and avoid unknown fields. For EXISTS, provide related_table and fk_column.'
|
123
|
+
else
|
124
|
+
'Avoid inventing tables/columns; map business terms to schema; use EXISTS for relationships; set column to "id" in EXISTS filters.'
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def strip_unknown_columns(intent, schema)
|
129
|
+
return intent unless intent.is_a?(Hash)
|
130
|
+
|
131
|
+
tables = Array(schema.dig('schema', 'tables') || schema['tables'] || [])
|
132
|
+
table_name = intent['table']
|
133
|
+
table = tables.find { |t| (t['name'] || t[:name]).to_s == table_name.to_s }
|
134
|
+
return intent unless table
|
135
|
+
|
136
|
+
columns = Array(table['columns'] || table[:columns]).map { |c| c['name'] || c[:name] }.compact
|
137
|
+
|
138
|
+
if intent['filters'].is_a?(Array)
|
139
|
+
intent['filters'] = intent['filters'].select do |f|
|
140
|
+
op = f['op'].to_s
|
141
|
+
next true if %w[exists not_exists].include?(op)
|
142
|
+
|
143
|
+
columns.include?(f['column'].to_s)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
if intent['order'].is_a?(Array)
|
148
|
+
intent['order'] = intent['order'].select { |o| columns.include?(o['column'].to_s) }
|
149
|
+
end
|
150
|
+
|
151
|
+
if intent['columns'].is_a?(Array)
|
152
|
+
intent['columns'] = intent['columns'].select { |c| c == '*' || columns.include?(c.to_s) }
|
153
|
+
end
|
154
|
+
|
155
|
+
intent
|
156
|
+
rescue StandardError
|
157
|
+
intent
|
158
|
+
end
|
159
|
+
|
160
|
+
# Fill missing intent params with simple values parsed from the prompt.
|
161
|
+
# Example: "student with id 17963" -> maps first integer to first missing *_id param.
|
162
|
+
def backfill_params_from_prompt(prompt, intent, schema)
|
163
|
+
return intent unless prompt.is_a?(String)
|
164
|
+
return intent unless intent.is_a?(Hash)
|
165
|
+
|
166
|
+
numbers = begin
|
167
|
+
prompt.scan(/\b\d+\b/).map { |n| Integer(n) }
|
168
|
+
rescue StandardError
|
169
|
+
[]
|
170
|
+
end
|
171
|
+
return intent if numbers.empty?
|
172
|
+
|
173
|
+
params = (intent['params'] || {}).dup
|
174
|
+
remaining = numbers.dup
|
175
|
+
|
176
|
+
# Helper to assign a value for a filter if missing
|
177
|
+
assign_for = lambda do |param_key|
|
178
|
+
return if param_key.nil? || param_key.to_s.strip.empty?
|
179
|
+
return if params.key?(param_key.to_s)
|
180
|
+
|
181
|
+
value = remaining.shift
|
182
|
+
params[param_key.to_s] = value if value
|
183
|
+
end
|
184
|
+
|
185
|
+
# Main table filters first, prioritize *_id columns
|
186
|
+
Array(intent['filters']).each do |f|
|
187
|
+
op = f['op'].to_s
|
188
|
+
next unless ['=', 'like', 'ilike', 'in', 'between'].include?(op) || %w[exists not_exists].include?(op)
|
189
|
+
|
190
|
+
if %w[exists not_exists].include?(op)
|
191
|
+
Array(f['related_filters']).each do |rf|
|
192
|
+
pkey = rf['param'] || rf['column']
|
193
|
+
if rf['column'].to_s.end_with?('_id')
|
194
|
+
assign_for.call(pkey)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
else
|
198
|
+
pkey = f['param'] || f['column']
|
199
|
+
if f['column'].to_s.end_with?('_id')
|
200
|
+
assign_for.call(pkey)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
# Enum-like backfill using model enums from schema (if available)
|
206
|
+
tokens = prompt.scan(/[a-z0-9_]+/i).map(&:downcase)
|
207
|
+
enums_by_table = extract_enums_index(schema)
|
208
|
+
|
209
|
+
# Main table columns
|
210
|
+
table_name = intent['table']
|
211
|
+
if table_name && enums_by_table[table_name]
|
212
|
+
Array(intent['filters']).each do |f|
|
213
|
+
next unless f.is_a?(Hash)
|
214
|
+
next unless f['op'].to_s == '='
|
215
|
+
|
216
|
+
col = f['column']
|
217
|
+
next unless col
|
218
|
+
|
219
|
+
pkey = (f['param'] || col).to_s
|
220
|
+
next if params.key?(pkey)
|
221
|
+
|
222
|
+
mapping = enums_by_table[table_name][col]
|
223
|
+
next unless mapping.is_a?(Hash) && mapping.any?
|
224
|
+
|
225
|
+
label = tokens.find { |tk| mapping.key?(tk) }
|
226
|
+
params[pkey] = label if label
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
# Related table columns in EXISTS
|
231
|
+
Array(intent['filters']).each do |f|
|
232
|
+
next unless f.is_a?(Hash) && %w[exists not_exists].include?(f['op'].to_s)
|
233
|
+
|
234
|
+
rtable = f['related_table']
|
235
|
+
next unless rtable && enums_by_table[rtable]
|
236
|
+
|
237
|
+
Array(f['related_filters']).each do |rf|
|
238
|
+
next unless rf.is_a?(Hash) && rf['op'].to_s == '='
|
239
|
+
|
240
|
+
rcol = rf['column']
|
241
|
+
next unless rcol
|
242
|
+
|
243
|
+
rpkey = (rf['param'] || rcol).to_s
|
244
|
+
next if params.key?(rpkey)
|
245
|
+
|
246
|
+
rmapping = enums_by_table[rtable][rcol]
|
247
|
+
next unless rmapping.is_a?(Hash) && rmapping.any?
|
248
|
+
|
249
|
+
rlabel = tokens.find { |tk| rmapping.key?(tk) }
|
250
|
+
params[rpkey] = rlabel if rlabel
|
251
|
+
end
|
252
|
+
end
|
253
|
+
|
254
|
+
intent.merge('params' => params)
|
255
|
+
rescue StandardError
|
256
|
+
intent
|
257
|
+
end
|
258
|
+
|
259
|
+
def extract_enums_index(schema)
|
260
|
+
index = Hash.new { |h, k| h[k] = {} }
|
261
|
+
return index unless schema.is_a?(Hash)
|
262
|
+
|
263
|
+
models = schema.dig('models', 'models')
|
264
|
+
return index unless models.is_a?(Hash)
|
265
|
+
|
266
|
+
models.each_value do |meta|
|
267
|
+
next unless meta.is_a?(Hash)
|
268
|
+
|
269
|
+
table = meta['table_name'] || meta[:table_name]
|
270
|
+
next unless table
|
271
|
+
|
272
|
+
enums = meta['enums'] || meta[:enums] || {}
|
273
|
+
next unless enums.is_a?(Hash)
|
274
|
+
|
275
|
+
enums.each do |col, mapping|
|
276
|
+
# Normalize mapping keys to downcased strings
|
277
|
+
next unless mapping.is_a?(Hash)
|
278
|
+
|
279
|
+
norm = {}
|
280
|
+
mapping.each { |k, v| norm[k.to_s.downcase] = v }
|
281
|
+
index[table.to_s][col.to_s] = norm
|
282
|
+
end
|
283
|
+
end
|
284
|
+
index
|
285
|
+
rescue StandardError
|
286
|
+
{}
|
287
|
+
end
|
288
|
+
end
|
289
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'pundit'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
7
|
+
|
8
|
+
module CodeToQuery
|
9
|
+
module Policies
|
10
|
+
class PunditAdapter
|
11
|
+
def call(current_user, table:, intent: nil) # rubocop:disable Lint/UnusedMethodArgument
|
12
|
+
return {} unless defined?(Pundit)
|
13
|
+
|
14
|
+
info = {
|
15
|
+
enforced_predicates: inferred_tenant_predicates(current_user, table),
|
16
|
+
allowed_tables: [],
|
17
|
+
allowed_columns: {}
|
18
|
+
}
|
19
|
+
|
20
|
+
model = infer_model_for_table(table)
|
21
|
+
if model
|
22
|
+
begin
|
23
|
+
Pundit.policy_scope!(current_user, model)
|
24
|
+
info[:allowed_tables] << table
|
25
|
+
rescue StandardError
|
26
|
+
end
|
27
|
+
|
28
|
+
policy = Pundit.policy(current_user, model)
|
29
|
+
if policy.respond_to?(:permitted_columns)
|
30
|
+
cols = Array(policy.permitted_columns).map(&:to_s)
|
31
|
+
info[:allowed_columns][table] = cols if cols.any?
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
info
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
def infer_model_for_table(table_name)
|
41
|
+
return nil unless defined?(ActiveRecord::Base)
|
42
|
+
return nil unless table_name
|
43
|
+
|
44
|
+
candidates = [
|
45
|
+
table_name.to_s.singularize.camelize,
|
46
|
+
table_name.to_s.camelize,
|
47
|
+
table_name.to_s.singularize.camelize.gsub(/s$/, '')
|
48
|
+
]
|
49
|
+
|
50
|
+
candidates.each do |klass|
|
51
|
+
k = klass.constantize
|
52
|
+
return k if k < ActiveRecord::Base && k.table_name == table_name
|
53
|
+
rescue NameError
|
54
|
+
next
|
55
|
+
end
|
56
|
+
nil
|
57
|
+
end
|
58
|
+
|
59
|
+
def inferred_tenant_predicates(current_user, _table)
|
60
|
+
return {} unless current_user
|
61
|
+
return {} unless current_user.respond_to?(:company_id)
|
62
|
+
|
63
|
+
if current_user.company_id
|
64
|
+
{ company_id: current_user.company_id }
|
65
|
+
else
|
66
|
+
{}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CodeToQuery
|
4
|
+
module Providers
|
5
|
+
class Base
|
6
|
+
attr_reader :metrics
|
7
|
+
|
8
|
+
def initialize(config)
|
9
|
+
@config = config
|
10
|
+
@metrics = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def extract_intent(prompt:, schema:, allow_tables:)
|
14
|
+
raise NotImplementedError, 'Subclasses must implement #extract_intent'
|
15
|
+
end
|
16
|
+
|
17
|
+
protected
|
18
|
+
|
19
|
+
# rubocop:disable Metrics/BlockNesting
|
20
|
+
def build_system_context(schema, allow_tables)
|
21
|
+
# Support passing either a raw schema hash ({'tables'=>[...]})
|
22
|
+
# or a full context pack ({'schema'=>{...}, 'models'=>{...}, 'glossary'=>{...}})
|
23
|
+
raw_schema = schema
|
24
|
+
model_defs = nil
|
25
|
+
scopes_map = nil
|
26
|
+
glossary = {}
|
27
|
+
if schema.is_a?(Hash) && schema['schema'].is_a?(Hash)
|
28
|
+
raw_schema = schema['schema']
|
29
|
+
if schema['models'].is_a?(Hash)
|
30
|
+
model_defs = schema['models']['models']
|
31
|
+
scopes_map = schema['models']['scopes']
|
32
|
+
end
|
33
|
+
glossary = schema['glossary'] || {}
|
34
|
+
end
|
35
|
+
|
36
|
+
available_tables = if allow_tables.any?
|
37
|
+
allow_tables
|
38
|
+
elsif raw_schema.is_a?(Hash) && raw_schema['tables']
|
39
|
+
raw_schema['tables'].map { |t| t['name'] || t[:name] }.compact
|
40
|
+
else
|
41
|
+
[]
|
42
|
+
end
|
43
|
+
|
44
|
+
schema_info = if raw_schema.is_a?(Hash) && raw_schema['tables']
|
45
|
+
raw_schema['tables'].map do |table|
|
46
|
+
table_name = table['name'] || table[:name]
|
47
|
+
cols = Array(table['columns'] || table[:columns])
|
48
|
+
col_names = cols.map { |c| c['name'] || c[:name] }.compact
|
49
|
+
fks = col_names.select { |c| c.end_with?('_id') }
|
50
|
+
col_summary = col_names.take(10).join(', ')
|
51
|
+
fk_summary = fks.any? ? " | fks: #{fks.join(', ')}" : ''
|
52
|
+
|
53
|
+
scope_summary = ''
|
54
|
+
if model_defs && scopes_map
|
55
|
+
# find model for this table
|
56
|
+
model_name, _def = model_defs.find { |_mn, md| (md['table_name'] || md[:table_name]) == table_name }
|
57
|
+
if model_name
|
58
|
+
scopes_for_model = scopes_map[model_name] || scopes_map[model_name.to_sym]
|
59
|
+
if scopes_for_model.is_a?(Hash) && scopes_for_model.any?
|
60
|
+
# Include compact where summaries when available
|
61
|
+
pairs = scopes_for_model.to_a.take(4).map do |(sname, meta)|
|
62
|
+
w = meta['where'] || meta[:where]
|
63
|
+
w ? "#{sname}: #{w}" : sname.to_s
|
64
|
+
end
|
65
|
+
scope_summary = pairs.any? ? " | scopes: #{pairs.join('; ')}" : ''
|
66
|
+
end
|
67
|
+
# include enum mapping if present
|
68
|
+
enums = model_defs.dig(model_name, 'enums') || model_defs.dig(model_name.to_sym, :enums) || {}
|
69
|
+
if enums.is_a?(Hash) && enums.any?
|
70
|
+
enum_pairs = enums.to_a.take(3).map do |(col, mapping)|
|
71
|
+
sample = mapping.is_a?(Hash) ? mapping.to_a.take(2).map { |k, v| "#{k}=#{v}" }.join(', ') : ''
|
72
|
+
sample.empty? ? col.to_s : "#{col}(#{sample})"
|
73
|
+
end
|
74
|
+
scope_summary += enum_pairs.any? ? " | enums: #{enum_pairs.join('; ')}" : ''
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
"#{table_name}: #{col_summary}#{fk_summary}#{scope_summary}"
|
80
|
+
end.join("\n")
|
81
|
+
else
|
82
|
+
'No schema information available'
|
83
|
+
end
|
84
|
+
|
85
|
+
# Include glossary information for better business understanding
|
86
|
+
glossary_info = if glossary.any?
|
87
|
+
"\nBusiness Glossary (user terms -> database concepts):\n" \
|
88
|
+
"#{glossary.map { |term, synonyms| "#{term}: #{Array(synonyms).join(', ')}" }.join("\n")}"
|
89
|
+
else
|
90
|
+
''
|
91
|
+
end
|
92
|
+
|
93
|
+
{
|
94
|
+
available_tables: available_tables,
|
95
|
+
schema_info: schema_info + glossary_info,
|
96
|
+
constraints: build_constraints_info
|
97
|
+
}
|
98
|
+
end
|
99
|
+
# rubocop:enable Metrics/BlockNesting
|
100
|
+
|
101
|
+
def build_constraints_info
|
102
|
+
base_constraints = [
|
103
|
+
'Only generate SELECT queries',
|
104
|
+
'All values must be parameterized',
|
105
|
+
'Use standard SQL operators: =, !=, <>, >, <, >=, <=, between, in, like, ilike',
|
106
|
+
'Prefer EXISTS/NOT EXISTS when expressing presence/absence of related rows',
|
107
|
+
'DISTINCT is allowed for unique result sets',
|
108
|
+
'DISTINCT ON (columns) is supported for PostgreSQL',
|
109
|
+
"For 'top N' queries, use ORDER BY with LIMIT"
|
110
|
+
]
|
111
|
+
|
112
|
+
limit_constraints = build_limit_constraints
|
113
|
+
aggregation_constraints = build_aggregation_constraints
|
114
|
+
|
115
|
+
base_constraints + limit_constraints + aggregation_constraints
|
116
|
+
end
|
117
|
+
|
118
|
+
def build_limit_constraints
|
119
|
+
constraints = []
|
120
|
+
|
121
|
+
if @config.default_limit
|
122
|
+
constraints << "Default LIMIT: #{@config.default_limit} for SELECT queries"
|
123
|
+
end
|
124
|
+
|
125
|
+
constraints << if @config.count_limit
|
126
|
+
"COUNT queries limited to #{@config.count_limit} rows"
|
127
|
+
else
|
128
|
+
'COUNT queries have no automatic LIMIT'
|
129
|
+
end
|
130
|
+
|
131
|
+
constraints << if @config.aggregation_limit
|
132
|
+
"Aggregation queries limited to #{@config.aggregation_limit} rows"
|
133
|
+
else
|
134
|
+
'Aggregation queries (SUM, AVG, MAX, MIN) have no automatic LIMIT'
|
135
|
+
end
|
136
|
+
|
137
|
+
if @config.distinct_limit
|
138
|
+
constraints << "DISTINCT queries limited to #{@config.distinct_limit} rows"
|
139
|
+
end
|
140
|
+
|
141
|
+
if @config.exists_limit
|
142
|
+
constraints << "EXISTS checks automatically use LIMIT #{@config.exists_limit}"
|
143
|
+
end
|
144
|
+
|
145
|
+
constraints
|
146
|
+
end
|
147
|
+
|
148
|
+
def build_aggregation_constraints
|
149
|
+
[
|
150
|
+
'COUNT(*) and COUNT(column) are supported',
|
151
|
+
'SUM, AVG, MAX, MIN aggregations are supported',
|
152
|
+
'GROUP BY is supported for aggregations',
|
153
|
+
'Multiple aggregations can be combined in a single query'
|
154
|
+
]
|
155
|
+
end
|
156
|
+
|
157
|
+
def validate_and_enhance_intent(intent, allow_tables)
|
158
|
+
intent['type'] ||= 'select'
|
159
|
+
intent['columns'] ||= ['*']
|
160
|
+
intent['filters'] ||= []
|
161
|
+
intent['order'] ||= []
|
162
|
+
intent['limit'] ||= @config.default_limit if @config.default_limit
|
163
|
+
intent['params'] ||= {}
|
164
|
+
|
165
|
+
if allow_tables.any? && !allow_tables.include?(intent['table'])
|
166
|
+
raise ArgumentError, "Table '#{intent['table']}' not in allowlist: #{allow_tables.join(', ')}"
|
167
|
+
end
|
168
|
+
|
169
|
+
intent
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CodeToQuery
|
4
|
+
module Providers
|
5
|
+
class Local < Base
|
6
|
+
def extract_intent(prompt:, schema:, allow_tables:)
|
7
|
+
@prompt = prompt.to_s.strip
|
8
|
+
@schema = schema || {}
|
9
|
+
@allow_tables = Array(allow_tables).compact
|
10
|
+
started_at = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
11
|
+
|
12
|
+
table = select_table
|
13
|
+
table ||= 'main_table' # Back-compat default expected by specs
|
14
|
+
|
15
|
+
intent = {
|
16
|
+
'type' => 'select',
|
17
|
+
'table' => table,
|
18
|
+
'columns' => ['*'],
|
19
|
+
'filters' => [],
|
20
|
+
'order' => [],
|
21
|
+
'params' => {}
|
22
|
+
}
|
23
|
+
|
24
|
+
# Only set limit when configured (avoid nil which fails validation)
|
25
|
+
intent['limit'] = @config.default_limit if @config.default_limit
|
26
|
+
|
27
|
+
result = validate_and_enhance_intent(intent, allow_tables)
|
28
|
+
|
29
|
+
# Lightweight metrics: elapsed and estimated tokens from prompt + schema table names
|
30
|
+
elapsed = Process.clock_gettime(Process::CLOCK_MONOTONIC) - started_at
|
31
|
+
prompt_blob = build_prompt_blob(@prompt, @schema)
|
32
|
+
est = estimate_tokens(prompt_blob)
|
33
|
+
@metrics[:prompt_tokens] = est
|
34
|
+
@metrics[:completion_tokens] = 0
|
35
|
+
@metrics[:total_tokens] = est
|
36
|
+
@metrics[:elapsed_s] = elapsed
|
37
|
+
|
38
|
+
result
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def build_prompt_blob(prompt, schema)
|
44
|
+
tables = Array(schema['tables'] || schema.dig('schema', 'tables') || [])
|
45
|
+
table_names = tables.map { |t| t['name'] || t[:name] }.compact.join(',')
|
46
|
+
[prompt.to_s, table_names].join("\n")
|
47
|
+
end
|
48
|
+
|
49
|
+
def estimate_tokens(text)
|
50
|
+
(text.to_s.length / 4.0).ceil
|
51
|
+
end
|
52
|
+
|
53
|
+
def select_table
|
54
|
+
return @allow_tables.first if @allow_tables.any?
|
55
|
+
|
56
|
+
tables = extract_schema_tables
|
57
|
+
return tables.first[:name] if tables.any?
|
58
|
+
|
59
|
+
nil
|
60
|
+
end
|
61
|
+
|
62
|
+
def extract_schema_tables
|
63
|
+
return [] unless @schema.is_a?(Hash)
|
64
|
+
|
65
|
+
raw_tables = if @schema['tables'].is_a?(Array)
|
66
|
+
@schema['tables']
|
67
|
+
elsif @schema['schema'].is_a?(Hash) && @schema['schema']['tables'].is_a?(Array)
|
68
|
+
@schema['schema']['tables']
|
69
|
+
else
|
70
|
+
[]
|
71
|
+
end
|
72
|
+
|
73
|
+
Array(raw_tables).map do |table|
|
74
|
+
next unless table.is_a?(Hash)
|
75
|
+
|
76
|
+
{
|
77
|
+
name: table['name'] || table[:name],
|
78
|
+
columns: Array(table['columns'] || table[:columns])
|
79
|
+
}
|
80
|
+
end.compact
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|