code_to_query 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +22 -0
- data/LICENSE.txt +23 -0
- data/README.md +167 -0
- data/lib/code_to_query/compiler.rb +674 -0
- data/lib/code_to_query/configuration.rb +92 -0
- data/lib/code_to_query/context/builder.rb +1087 -0
- data/lib/code_to_query/context/pack.rb +36 -0
- data/lib/code_to_query/errors.rb +5 -0
- data/lib/code_to_query/guardrails/explain_gate.rb +229 -0
- data/lib/code_to_query/guardrails/sql_linter.rb +335 -0
- data/lib/code_to_query/llm_client.rb +46 -0
- data/lib/code_to_query/performance/cache.rb +250 -0
- data/lib/code_to_query/performance/optimizer.rb +396 -0
- data/lib/code_to_query/planner.rb +289 -0
- data/lib/code_to_query/policies/pundit_adapter.rb +71 -0
- data/lib/code_to_query/providers/base.rb +173 -0
- data/lib/code_to_query/providers/local.rb +84 -0
- data/lib/code_to_query/providers/openai.rb +581 -0
- data/lib/code_to_query/query.rb +385 -0
- data/lib/code_to_query/railtie.rb +16 -0
- data/lib/code_to_query/runner.rb +188 -0
- data/lib/code_to_query/validator.rb +203 -0
- data/lib/code_to_query/version.rb +6 -0
- data/lib/code_to_query.rb +90 -0
- data/tasks/code_to_query.rake +326 -0
- metadata +225 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Context::Pack is a serializable bundle of schema, associations, glossary,
|
4
|
+
# join paths, policies, and hints used by the planner.
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
|
8
|
+
module CodeToQuery
|
9
|
+
module Context
|
10
|
+
class Pack
|
11
|
+
attr_reader :schema, :models, :glossary, :policies, :hints
|
12
|
+
|
13
|
+
def initialize(schema:, models:, glossary:, policies:, hints:)
|
14
|
+
@schema = schema
|
15
|
+
@models = models
|
16
|
+
@glossary = glossary
|
17
|
+
@policies = policies
|
18
|
+
@hints = hints
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_h
|
22
|
+
{
|
23
|
+
schema: @schema,
|
24
|
+
models: @models,
|
25
|
+
glossary: @glossary,
|
26
|
+
policies: @policies,
|
27
|
+
hints: @hints
|
28
|
+
}
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_json(*args)
|
32
|
+
JSON.pretty_generate(to_h, *args)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,229 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'active_record'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
7
|
+
|
8
|
+
module CodeToQuery
|
9
|
+
module Guardrails
|
10
|
+
class ExplainGate
|
11
|
+
DEFAULT_MAX_COST = 10_000
|
12
|
+
DEFAULT_MAX_ROWS = 100_000
|
13
|
+
|
14
|
+
def initialize(config)
|
15
|
+
@config = config
|
16
|
+
end
|
17
|
+
|
18
|
+
def allowed?(sql)
|
19
|
+
return true unless defined?(ActiveRecord::Base) && ActiveRecord::Base.connected?
|
20
|
+
|
21
|
+
plan = get_explain_plan(sql)
|
22
|
+
return true if plan.nil? || plan.empty?
|
23
|
+
|
24
|
+
analyze_plan_safety(plan)
|
25
|
+
rescue StandardError => e
|
26
|
+
# Log error; fail-open or fail-closed based on configuration
|
27
|
+
CodeToQuery.config.logger.warn("[code_to_query] ExplainGate error: #{e.message}")
|
28
|
+
!!@config.explain_fail_open
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def get_explain_plan(sql)
|
34
|
+
explain_sql = build_explain_query(sql)
|
35
|
+
|
36
|
+
result = if @config.readonly_role && ActiveRecord.respond_to?(:connected_to)
|
37
|
+
ActiveRecord::Base.connected_to(role: @config.readonly_role) do
|
38
|
+
ActiveRecord::Base.connection.execute(explain_sql)
|
39
|
+
end
|
40
|
+
else
|
41
|
+
ActiveRecord::Base.connection.execute(explain_sql)
|
42
|
+
end
|
43
|
+
|
44
|
+
normalize_explain_result(result)
|
45
|
+
end
|
46
|
+
|
47
|
+
def build_explain_query(sql)
|
48
|
+
case @config.adapter
|
49
|
+
when :postgres, :postgresql
|
50
|
+
"EXPLAIN (ANALYZE false, BUFFERS false, VERBOSE false, FORMAT JSON) #{sql}"
|
51
|
+
when :mysql
|
52
|
+
"EXPLAIN FORMAT=JSON #{sql}"
|
53
|
+
when :sqlite
|
54
|
+
"EXPLAIN QUERY PLAN #{sql}"
|
55
|
+
else
|
56
|
+
"EXPLAIN #{sql}"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def normalize_explain_result(result)
|
61
|
+
case result
|
62
|
+
when Array
|
63
|
+
result.map do |row|
|
64
|
+
case row
|
65
|
+
when Hash
|
66
|
+
row
|
67
|
+
when Array
|
68
|
+
row.first
|
69
|
+
else
|
70
|
+
row.to_s
|
71
|
+
end
|
72
|
+
end
|
73
|
+
else
|
74
|
+
[result.to_s]
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def analyze_plan_safety(plan)
|
79
|
+
case @config.adapter
|
80
|
+
when :postgres, :postgresql
|
81
|
+
analyze_postgres_plan(plan)
|
82
|
+
when :mysql
|
83
|
+
analyze_mysql_plan(plan)
|
84
|
+
when :sqlite
|
85
|
+
analyze_sqlite_plan(plan)
|
86
|
+
else
|
87
|
+
analyze_generic_plan(plan)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def analyze_postgres_plan(plan)
|
92
|
+
if plan.first.is_a?(Hash) && plan.first['QUERY PLAN']
|
93
|
+
json_plan = plan.first['QUERY PLAN']
|
94
|
+
return analyze_postgres_json_plan(json_plan)
|
95
|
+
end
|
96
|
+
|
97
|
+
analyze_postgres_text_plan(plan)
|
98
|
+
end
|
99
|
+
|
100
|
+
def analyze_postgres_json_plan(json_plan)
|
101
|
+
return true unless json_plan.is_a?(Array) && json_plan.first.is_a?(Hash)
|
102
|
+
|
103
|
+
root_node = json_plan.first['Plan']
|
104
|
+
return true unless root_node
|
105
|
+
|
106
|
+
# Check for expensive operations
|
107
|
+
check_node_safety(root_node)
|
108
|
+
end
|
109
|
+
|
110
|
+
def check_node_safety(node)
|
111
|
+
return true unless node.is_a?(Hash)
|
112
|
+
|
113
|
+
node_type = node['Node Type']
|
114
|
+
total_cost = node['Total Cost'].to_f if node['Total Cost']
|
115
|
+
node['Startup Cost']&.to_f
|
116
|
+
plan_rows = node['Plan Rows'].to_i if node['Plan Rows']
|
117
|
+
|
118
|
+
# Block if costs are too high
|
119
|
+
max_cost = @config.max_query_cost || DEFAULT_MAX_COST
|
120
|
+
if total_cost && total_cost > max_cost
|
121
|
+
warn "[code_to_query] Query blocked: total cost #{total_cost} exceeds limit #{max_cost}"
|
122
|
+
return false
|
123
|
+
end
|
124
|
+
|
125
|
+
# Block if estimated rows are too high
|
126
|
+
max_rows = @config.max_query_rows || DEFAULT_MAX_ROWS
|
127
|
+
if plan_rows && plan_rows > max_rows
|
128
|
+
warn "[code_to_query] Query blocked: estimated rows #{plan_rows} exceeds limit #{max_rows}"
|
129
|
+
return false
|
130
|
+
end
|
131
|
+
|
132
|
+
# Block dangerous scan types
|
133
|
+
case node_type
|
134
|
+
when 'Seq Scan'
|
135
|
+
# Only allow seq scans on small tables or if explicitly allowed
|
136
|
+
if plan_rows && plan_rows > 1000 && !@config.allow_seq_scans
|
137
|
+
warn "[code_to_query] Query blocked: sequential scan on large table (#{plan_rows} rows)"
|
138
|
+
return false
|
139
|
+
end
|
140
|
+
when 'Nested Loop'
|
141
|
+
# Block nested loops with high row estimates
|
142
|
+
if plan_rows && plan_rows > 10_000
|
143
|
+
warn "[code_to_query] Query blocked: expensive nested loop (#{plan_rows} rows)"
|
144
|
+
return false
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
# Recursively check child nodes
|
149
|
+
node['Plans']&.each do |child_node|
|
150
|
+
return false unless check_node_safety(child_node)
|
151
|
+
end
|
152
|
+
|
153
|
+
true
|
154
|
+
end
|
155
|
+
|
156
|
+
def analyze_postgres_text_plan(plan)
|
157
|
+
plan_text = plan.join("\n").downcase
|
158
|
+
|
159
|
+
if plan_text.include?('seq scan') && !@config.allow_seq_scans
|
160
|
+
CodeToQuery.config.logger.warn('[code_to_query] Query blocked: contains sequential scan')
|
161
|
+
return false
|
162
|
+
end
|
163
|
+
|
164
|
+
expensive_operations = [
|
165
|
+
'sort',
|
166
|
+
'hash join',
|
167
|
+
'nested loop'
|
168
|
+
]
|
169
|
+
|
170
|
+
expensive_operations.each do |op|
|
171
|
+
if plan_text.include?(op) && plan_text.match?(/cost=\d{4,}/)
|
172
|
+
CodeToQuery.config.logger.warn("[code_to_query] Query blocked: expensive #{op} operation")
|
173
|
+
return false
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
true
|
178
|
+
end
|
179
|
+
|
180
|
+
def analyze_mysql_plan(plan)
|
181
|
+
plan_text = plan.join("\n").downcase
|
182
|
+
|
183
|
+
if plan_text.include?('full table scan')
|
184
|
+
CodeToQuery.config.logger.warn('[code_to_query] Query blocked: full table scan detected')
|
185
|
+
return false
|
186
|
+
end
|
187
|
+
|
188
|
+
if plan_text.include?('using filesort')
|
189
|
+
CodeToQuery.config.logger.warn('[code_to_query] Query blocked: filesort operation detected')
|
190
|
+
return false
|
191
|
+
end
|
192
|
+
|
193
|
+
true
|
194
|
+
end
|
195
|
+
|
196
|
+
def analyze_sqlite_plan(plan)
|
197
|
+
plan_text = plan.join("\n").downcase
|
198
|
+
|
199
|
+
if plan_text.include?('scan table')
|
200
|
+
CodeToQuery.config.logger.warn('[code_to_query] Query blocked: table scan detected')
|
201
|
+
return false
|
202
|
+
end
|
203
|
+
|
204
|
+
true
|
205
|
+
end
|
206
|
+
|
207
|
+
def analyze_generic_plan(plan)
|
208
|
+
plan_text = plan.join("\n").downcase
|
209
|
+
|
210
|
+
dangerous_patterns = [
|
211
|
+
/full.+scan/,
|
212
|
+
/seq.+scan/,
|
213
|
+
/table.+scan/,
|
214
|
+
/sort.+\d{4,}/, # Expensive sorts
|
215
|
+
/cost.+\d{4,}/ # High cost operations
|
216
|
+
]
|
217
|
+
|
218
|
+
dangerous_patterns.each do |pattern|
|
219
|
+
if plan_text.match?(pattern)
|
220
|
+
CodeToQuery.config.logger.warn("[code_to_query] Query blocked: dangerous pattern #{pattern} in plan")
|
221
|
+
return false
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
true
|
226
|
+
end
|
227
|
+
end
|
228
|
+
end
|
229
|
+
end
|
@@ -0,0 +1,335 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module CodeToQuery
|
4
|
+
module Guardrails
|
5
|
+
class SqlLinter
|
6
|
+
def initialize(config, allow_tables: nil)
|
7
|
+
@config = config
|
8
|
+
# normalize allowlist to lowercase for case-insensitive comparison
|
9
|
+
@allow_tables = Array(allow_tables).compact.map { |t| t.to_s.downcase }
|
10
|
+
end
|
11
|
+
|
12
|
+
def check!(sql)
|
13
|
+
normalized = sql.to_s.strip.gsub(/\s+/, ' ')
|
14
|
+
|
15
|
+
check_statement_type!(normalized)
|
16
|
+
check_dangerous_patterns!(normalized)
|
17
|
+
check_required_limit!(normalized)
|
18
|
+
check_table_allowlist!(normalized) if @allow_tables.any?
|
19
|
+
check_no_literals!(normalized)
|
20
|
+
check_no_dangerous_functions!(normalized)
|
21
|
+
check_no_subqueries!(normalized) if @config.block_subqueries
|
22
|
+
check_join_complexity!(normalized)
|
23
|
+
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def check_statement_type!(sql)
|
30
|
+
raise SecurityError, 'Only SELECT statements are allowed' unless sql =~ /\A\s*SELECT\b/i
|
31
|
+
|
32
|
+
raise SecurityError, 'Multiple statements and semicolons are not allowed' if sql.count(';').positive?
|
33
|
+
|
34
|
+
dangerous_keywords = %w[
|
35
|
+
DROP ALTER CREATE INSERT UPDATE DELETE TRUNCATE
|
36
|
+
GRANT REVOKE EXEC EXECUTE CALL
|
37
|
+
]
|
38
|
+
|
39
|
+
dangerous_keywords.each do |keyword|
|
40
|
+
raise SecurityError, "Dangerous keyword '#{keyword}' is not allowed" if sql.match?(/\b#{keyword}\b/i)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def check_dangerous_patterns!(sql)
|
45
|
+
patterns = build_dangerous_patterns
|
46
|
+
patterns.each do |pattern|
|
47
|
+
raise SecurityError, "Dangerous SQL pattern detected: #{pattern.inspect}" if sql.match?(pattern)
|
48
|
+
end
|
49
|
+
|
50
|
+
check_encoding_bypass!(sql)
|
51
|
+
check_polyglot_attacks!(sql)
|
52
|
+
check_time_delay_patterns!(sql)
|
53
|
+
end
|
54
|
+
|
55
|
+
def build_dangerous_patterns
|
56
|
+
generic = [
|
57
|
+
/;\s*--/i,
|
58
|
+
%r{;\s*/\*}i,
|
59
|
+
/'\s*;\s*/i,
|
60
|
+
/\bunion\b.*?\bselect\b/i,
|
61
|
+
/\bor\s+1\s*=\s*1\b/i,
|
62
|
+
/0x[0-9a-f]+/i, # hex literal (often MySQL)
|
63
|
+
/'\s*\+\s*'/,
|
64
|
+
/'\s*or\s+'/i,
|
65
|
+
/'\s*and\s+'/i,
|
66
|
+
/'\s*union\s+'/i,
|
67
|
+
/'\s*;\s*'/,
|
68
|
+
/\\'/,
|
69
|
+
/%27/,
|
70
|
+
/%3B/,
|
71
|
+
%r{/\*.*?\*/}m,
|
72
|
+
/--[^\r\n]*/,
|
73
|
+
/\#[^\r\n]*/,
|
74
|
+
/\bcast\s*\(.*?\bas\b/i,
|
75
|
+
/;\s*(drop|create|alter|insert|update|delete)\s+/i,
|
76
|
+
/information_schema/i,
|
77
|
+
/into\s+outfile/i,
|
78
|
+
/into\s+dumpfile/i
|
79
|
+
]
|
80
|
+
|
81
|
+
case @config.adapter
|
82
|
+
when :postgres, :postgresql
|
83
|
+
generic + [
|
84
|
+
/\|\|/, # concat
|
85
|
+
/pg_\w+/i, # pg functions
|
86
|
+
/pg_sleep\s*\(/i # time-based
|
87
|
+
]
|
88
|
+
when :mysql
|
89
|
+
generic + [
|
90
|
+
/%20(union|or|and)%20/i,
|
91
|
+
/unhex\s*\(/i,
|
92
|
+
/benchmark\s*\(/i,
|
93
|
+
/sleep\s*\(/i,
|
94
|
+
/extractvalue\s*\(/i,
|
95
|
+
/updatexml\s*\(/i,
|
96
|
+
/into\s+outfile/i,
|
97
|
+
/into\s+dumpfile/i,
|
98
|
+
/mysql\./i,
|
99
|
+
/performance_schema/i
|
100
|
+
]
|
101
|
+
when :sqlserver, :mssql
|
102
|
+
generic + [
|
103
|
+
/\+\s*SELECT/i,
|
104
|
+
/waitfor\s+delay/i,
|
105
|
+
/\bxp_\w+\b/i,
|
106
|
+
/\bsp_\w+\b/i,
|
107
|
+
/\bfn_\w+\b/i,
|
108
|
+
/msdb\./i,
|
109
|
+
/tempdb\./i
|
110
|
+
]
|
111
|
+
when :sqlite
|
112
|
+
generic + [
|
113
|
+
/sqlite_master/i
|
114
|
+
]
|
115
|
+
else
|
116
|
+
generic
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def check_required_limit!(sql)
|
121
|
+
# COUNT queries: no LIMIT required
|
122
|
+
return if is_count_query?(sql)
|
123
|
+
|
124
|
+
# Aggregations (excluding COUNT): if configured, require and enforce per-type limit
|
125
|
+
if is_non_count_aggregation_query?(sql)
|
126
|
+
unless allows_unlimited_aggregations?
|
127
|
+
raise SecurityError, 'LIMIT clause is required for aggregation queries' unless sql =~ /\bLIMIT\s+\d+\b/i
|
128
|
+
|
129
|
+
enforce_per_type_limit!(sql, @config.aggregation_limit) if @config.aggregation_limit
|
130
|
+
end
|
131
|
+
enforce_global_max_limit!(sql)
|
132
|
+
return
|
133
|
+
end
|
134
|
+
|
135
|
+
# DISTINCT queries: if configured, require and enforce per-type limit
|
136
|
+
if has_distinct?(sql)
|
137
|
+
unless allows_unlimited_distinct?
|
138
|
+
raise SecurityError, 'LIMIT clause is required for DISTINCT queries' unless sql =~ /\bLIMIT\s+\d+\b/i
|
139
|
+
|
140
|
+
enforce_per_type_limit!(sql, @config.distinct_limit) if @config.distinct_limit
|
141
|
+
end
|
142
|
+
enforce_global_max_limit!(sql)
|
143
|
+
return
|
144
|
+
end
|
145
|
+
|
146
|
+
# EXISTS: usually controlled by compiler via LIMIT 1; don't enforce here
|
147
|
+
return if sql.match?(/\bEXISTS\s*\(/i)
|
148
|
+
|
149
|
+
# Default: require limit for SELECT queries
|
150
|
+
if requires_limit?(sql) && sql !~ /\bLIMIT\s+\d+\b/i
|
151
|
+
raise SecurityError, 'LIMIT clause is required for this query type'
|
152
|
+
end
|
153
|
+
|
154
|
+
enforce_global_max_limit!(sql)
|
155
|
+
end
|
156
|
+
|
157
|
+
def requires_limit?(_sql)
|
158
|
+
# Respect configuration: require LIMIT only if enabled
|
159
|
+
!!@config.require_limit_by_default
|
160
|
+
end
|
161
|
+
|
162
|
+
def enforce_global_max_limit!(sql)
|
163
|
+
limit_match = sql.match(/\bLIMIT\s+(\d+)\b/i)
|
164
|
+
return unless limit_match
|
165
|
+
|
166
|
+
limit_value = limit_match[1].to_i
|
167
|
+
max_limit = @config.max_limit || 10_000
|
168
|
+
|
169
|
+
raise SecurityError, "LIMIT value #{limit_value} exceeds maximum allowed (#{max_limit})" if limit_value > max_limit
|
170
|
+
raise SecurityError, 'LIMIT value must be positive' if limit_value <= 0
|
171
|
+
end
|
172
|
+
|
173
|
+
def enforce_per_type_limit!(sql, per_type_limit)
|
174
|
+
return unless per_type_limit
|
175
|
+
|
176
|
+
limit_match = sql.match(/\bLIMIT\s+(\d+)\b/i)
|
177
|
+
return unless limit_match
|
178
|
+
|
179
|
+
limit_value = limit_match[1].to_i
|
180
|
+
raise SecurityError, "LIMIT value #{limit_value} exceeds per-type maximum (#{per_type_limit})" if limit_value > per_type_limit
|
181
|
+
end
|
182
|
+
|
183
|
+
def has_distinct?(sql)
|
184
|
+
sql.match?(/\bDISTINCT\b/i)
|
185
|
+
end
|
186
|
+
|
187
|
+
def is_count_query?(sql)
|
188
|
+
sql.match?(/\bCOUNT\s*\(/i)
|
189
|
+
end
|
190
|
+
|
191
|
+
def is_non_count_aggregation_query?(sql)
|
192
|
+
sql.match?(/\b(SUM|AVG|MAX|MIN)\s*\(/i)
|
193
|
+
end
|
194
|
+
|
195
|
+
def has_count_or_aggregation?(sql)
|
196
|
+
sql.match?(/\b(COUNT|SUM|AVG|MAX|MIN)\s*\(/i)
|
197
|
+
end
|
198
|
+
|
199
|
+
def allows_unlimited_aggregations?
|
200
|
+
@config.aggregation_limit.nil? && @config.count_limit.nil?
|
201
|
+
end
|
202
|
+
|
203
|
+
def allows_unlimited_distinct?
|
204
|
+
@config.distinct_limit.nil?
|
205
|
+
end
|
206
|
+
|
207
|
+
def check_table_allowlist!(sql)
|
208
|
+
# Extract table names from FROM and JOIN clauses
|
209
|
+
referenced_tables = extract_table_names(sql)
|
210
|
+
|
211
|
+
referenced_tables.each do |table|
|
212
|
+
unless @allow_tables.include?(table.to_s.downcase)
|
213
|
+
raise SecurityError, "Table '#{table}' is not in the allowed list: #{@allow_tables.join(', ')}"
|
214
|
+
end
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
def check_no_literals!(sql)
|
219
|
+
# Block string literals (except in very specific contexts)
|
220
|
+
# Allow some specific cases like LIKE patterns that might be pre-sanitized
|
221
|
+
if sql.match?(/'[^']*'/) && !(sql.match?(/\bLIKE\s+\$\d+\b/i) || sql.match?(/\bILIKE\s+\$\d+\b/i))
|
222
|
+
raise SecurityError, 'String literals are not allowed; use parameterized queries'
|
223
|
+
end
|
224
|
+
|
225
|
+
# Block numeric literals in WHERE clauses (except for common safe values)
|
226
|
+
where_match = sql.match(/\bWHERE\s+(.+?)(?:\s+ORDER\s+BY|\s+LIMIT|\s+GROUP\s+BY|\Z)/i)
|
227
|
+
return unless where_match
|
228
|
+
|
229
|
+
where_clause = where_match[1]
|
230
|
+
# Allow 0, 1, -1, and placeholder patterns
|
231
|
+
return unless where_clause.match?(/\b\d{2,}\b/) && !where_clause.match?(/\$\d+|\?/)
|
232
|
+
|
233
|
+
raise SecurityError, 'Numeric literals in WHERE clauses should be parameterized'
|
234
|
+
end
|
235
|
+
|
236
|
+
def check_no_dangerous_functions!(sql)
|
237
|
+
dangerous_functions = %w[
|
238
|
+
load_file outfile dumpfile
|
239
|
+
sys_exec sys_eval
|
240
|
+
benchmark sleep pg_sleep
|
241
|
+
version user database schema
|
242
|
+
current_user current_database current_schema
|
243
|
+
inet_server_addr inet_client_addr
|
244
|
+
]
|
245
|
+
|
246
|
+
dangerous_functions.each do |func|
|
247
|
+
raise SecurityError, "Dangerous function '#{func}' is not allowed" if sql.match?(/\b#{func}\s*\(/i)
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
def check_no_subqueries!(sql)
|
252
|
+
return unless sql.match?(/\(\s*SELECT\b/i)
|
253
|
+
|
254
|
+
raise SecurityError, 'Subqueries are not allowed in this context'
|
255
|
+
end
|
256
|
+
|
257
|
+
def check_join_complexity!(sql)
|
258
|
+
# Count JOIN operations
|
259
|
+
join_count = sql.scan(/\bJOIN\b/i).length
|
260
|
+
max_joins = @config.max_joins || 3
|
261
|
+
|
262
|
+
raise SecurityError, "Too many JOINs (#{join_count}); maximum allowed: #{max_joins}" if join_count > max_joins
|
263
|
+
|
264
|
+
# Ensure JOINs have explicit conditions
|
265
|
+
return unless join_count.positive? && !sql.match?(/\bON\b/i)
|
266
|
+
|
267
|
+
raise SecurityError, 'JOINs must have explicit ON conditions'
|
268
|
+
end
|
269
|
+
|
270
|
+
def check_encoding_bypass!(sql)
|
271
|
+
# Check for various encoding bypass attempts
|
272
|
+
encoding_patterns = [
|
273
|
+
/%2[0-9a-f]/i, # URL encoding
|
274
|
+
/&#x?[0-9a-f]+;/i, # HTML entity encoding
|
275
|
+
/\\x[0-9a-f]{2}/i, # Hex escape sequences
|
276
|
+
/\\[0-7]{3}/, # Octal escape sequences
|
277
|
+
/\\u[0-9a-f]{4}/i # Unicode escape sequences
|
278
|
+
]
|
279
|
+
|
280
|
+
encoding_patterns.each do |pattern|
|
281
|
+
raise SecurityError, "Potential encoding bypass detected: #{pattern.inspect}" if sql.match?(pattern)
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
def check_polyglot_attacks!(sql)
|
286
|
+
# Check for polyglot SQL attacks that work across multiple databases
|
287
|
+
polyglot_patterns = [
|
288
|
+
/select.*from.*information_schema.*union.*select/i,
|
289
|
+
/\bor\b.*\bsleep\b.*\band\b.*\bbenchmark\b/i,
|
290
|
+
/union.*select.*version\(\).*database\(\)/i,
|
291
|
+
/\bif\s*\(\s*1\s*=\s*1\s*,\s*sleep\s*\(/i
|
292
|
+
]
|
293
|
+
|
294
|
+
polyglot_patterns.each do |pattern|
|
295
|
+
raise SecurityError, "Potential polyglot attack detected: #{pattern.inspect}" if sql.match?(pattern)
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def check_time_delay_patterns!(sql)
|
300
|
+
# Advanced time-based attack detection
|
301
|
+
time_patterns = [
|
302
|
+
/waitfor\s+delay\s+['"]\d+:\d+:\d+['"]/i, # SQL Server specific delay
|
303
|
+
/select\s+sleep\s*\(\s*\d+\s*\)/i, # MySQL sleep
|
304
|
+
/select\s+pg_sleep\s*\(\s*\d+\s*\)/i, # PostgreSQL sleep
|
305
|
+
/benchmark\s*\(\s*\d+\s*,\s*.+?\)/i, # MySQL benchmark
|
306
|
+
/\bif\s*\(.+?,\s*sleep\s*\(/i # Conditional time delays
|
307
|
+
]
|
308
|
+
|
309
|
+
time_patterns.each do |pattern|
|
310
|
+
raise SecurityError, "Time-based attack pattern detected: #{pattern.inspect}" if sql.match?(pattern)
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
def extract_table_names(sql)
|
315
|
+
tables = []
|
316
|
+
|
317
|
+
# Extract FROM clause tables (improved regex)
|
318
|
+
from_matches = sql.scan(/\bFROM\s+(?:`([^`]+)`|"([^"]+)"|'([^']+)'|([a-zA-Z0-9_]+)(?:\s+(?:AS\s+)?[a-zA-Z_][a-zA-Z0-9_]*)?)/i)
|
319
|
+
from_matches.each do |match|
|
320
|
+
table_name = match.compact.first
|
321
|
+
tables << table_name if table_name
|
322
|
+
end
|
323
|
+
|
324
|
+
# Extract JOIN clause tables (improved regex)
|
325
|
+
join_matches = sql.scan(/\b(?:INNER\s+|LEFT\s+|RIGHT\s+|FULL\s+|CROSS\s+)?JOIN\s+(?:`([^`]+)`|"([^"]+)"|'([^']+)'|([a-zA-Z0-9_]+)(?:\s+(?:AS\s+)?[a-zA-Z_][a-zA-Z0-9_]*)?)/i)
|
326
|
+
join_matches.each do |match|
|
327
|
+
table_name = match.compact.first
|
328
|
+
tables << table_name if table_name
|
329
|
+
end
|
330
|
+
|
331
|
+
tables.uniq
|
332
|
+
end
|
333
|
+
end
|
334
|
+
end
|
335
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
module CodeToQuery
|
8
|
+
class LLMClient
|
9
|
+
def initialize(config)
|
10
|
+
@config = config
|
11
|
+
end
|
12
|
+
|
13
|
+
# messages: [{ role: 'system'|'user'|'assistant', content: '...' }, ...]
|
14
|
+
# options: extra provider-specific options to merge into payload
|
15
|
+
# Returns assistant message content (String) or nil
|
16
|
+
def chat(messages:, options: {})
|
17
|
+
base = (@config.llm_api_base || 'https://api.openai.com/v1').to_s
|
18
|
+
uri = URI("#{base.chomp('/')}/chat/completions")
|
19
|
+
|
20
|
+
payload = {
|
21
|
+
model: @config.openai_model,
|
22
|
+
messages: messages,
|
23
|
+
temperature: @config.llm_temperature
|
24
|
+
}.merge(@config.provider_options || {}).merge(options || {})
|
25
|
+
|
26
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
27
|
+
http.use_ssl = (uri.scheme == 'https')
|
28
|
+
http.read_timeout = @config.llm_timeout
|
29
|
+
|
30
|
+
request = Net::HTTP::Post.new(uri)
|
31
|
+
request['Authorization'] = "Bearer #{@config.openai_api_key}" if @config.openai_api_key
|
32
|
+
request['Content-Type'] = 'application/json'
|
33
|
+
request.body = payload.to_json
|
34
|
+
|
35
|
+
response = http.request(request)
|
36
|
+
unless response.is_a?(Net::HTTPSuccess)
|
37
|
+
raise CodeToQuery::APIError, "LLM API error: #{response.code} #{response.message}"
|
38
|
+
end
|
39
|
+
|
40
|
+
body = JSON.parse(response.body)
|
41
|
+
body.dig('choices', 0, 'message', 'content')
|
42
|
+
rescue StandardError => e
|
43
|
+
raise CodeToQuery::APIError, e.message
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|