code_to_query 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'dry/schema'
4
+
5
+ module CodeToQuery
6
+ class Validator
7
+ IntentSchema = Dry::Schema.Params do
8
+ required(:type).filled(:string)
9
+ required(:table).filled(:string)
10
+ required(:columns).array(:string)
11
+ optional(:filters).array(:hash) do
12
+ optional(:column).maybe(:string)
13
+ required(:op).filled(:string)
14
+ optional(:param).filled(:string)
15
+ optional(:param_start).filled(:string)
16
+ optional(:param_end).filled(:string)
17
+ # Optional fields to support correlated subqueries (NOT EXISTS)
18
+ optional(:related_table).filled(:string)
19
+ optional(:fk_column).filled(:string)
20
+ optional(:base_column).filled(:string)
21
+ optional(:related_filters).array(:hash) do
22
+ required(:column).filled(:string)
23
+ required(:op).filled(:string)
24
+ optional(:param).filled(:string)
25
+ optional(:param_start).filled(:string)
26
+ optional(:param_end).filled(:string)
27
+ end
28
+ end
29
+ optional(:order).array(:hash) do
30
+ required(:column).filled(:string)
31
+ required(:dir).filled(:string)
32
+ end
33
+ optional(:limit).filled(:integer)
34
+ optional(:params).hash
35
+ optional(:distinct).filled(:bool)
36
+ optional(:distinct_on).array(:string)
37
+ optional(:aggregations).array(:hash) do
38
+ required(:type).filled(:string)
39
+ optional(:column).filled(:string)
40
+ end
41
+ optional(:group_by).array(:string)
42
+ end
43
+
44
+ def validate(intent_hash, current_user: nil, allow_tables: nil)
45
+ preprocessed = preprocess_exists_filters(intent_hash)
46
+
47
+ if !preprocessed.key?('limit') && CodeToQuery.config.default_limit
48
+ preprocessed = preprocessed.merge('limit' => CodeToQuery.config.default_limit)
49
+ end
50
+
51
+ result = IntentSchema.call(preprocessed)
52
+ raise ArgumentError, "Invalid intent: #{result.errors.to_h}" unless result.success?
53
+
54
+ validated = result.to_h
55
+
56
+ original_metrics = intent_hash['_metrics'] || intent_hash[:_metrics]
57
+ validated['_metrics'] = original_metrics if original_metrics.is_a?(Hash)
58
+
59
+ Array(validated['filters']).each_with_index do |f, idx|
60
+ op = f['op'].to_s
61
+ if %w[exists not_exists].include?(op)
62
+ unless f['related_table'].to_s.strip != '' && f['fk_column'].to_s.strip != ''
63
+ raise ArgumentError, "Invalid intent: filters[#{idx}] requires related_table and fk_column for #{op}"
64
+ end
65
+
66
+ f['base_column'] ||= 'id'
67
+ f['column'] ||= 'id'
68
+ else
69
+ unless f['column'].to_s.strip != ''
70
+ raise ArgumentError, "Invalid intent: filters[#{idx}].column must be filled"
71
+ end
72
+ end
73
+ end
74
+ enforce_allowlists!(validated, current_user: current_user, allow_tables: allow_tables)
75
+ validated
76
+ end
77
+
78
+ private
79
+
80
+ def preprocess_exists_filters(intent_hash)
81
+ intent = intent_hash.dup
82
+
83
+ if intent['filters'].is_a?(Array)
84
+ intent['filters'] = intent['filters'].map do |filter|
85
+ if filter.is_a?(Hash) && %w[exists not_exists].include?(filter['op'].to_s) && filter['column'].nil?
86
+ filter.merge('column' => 'id')
87
+ else
88
+ filter
89
+ end
90
+ end
91
+ end
92
+
93
+ intent
94
+ end
95
+
96
+ def enforce_allowlists!(intent, current_user:, allow_tables:)
97
+ # Enforce table allowlist if provided (from user input)
98
+ if Array(allow_tables).any?
99
+ table = intent['table']
100
+ if (table.to_s.strip != '') && !Array(allow_tables).map { |t| t.to_s.downcase }.include?(table.to_s.downcase)
101
+ raise ArgumentError, "Invalid intent: table '#{table}' not allowed"
102
+ end
103
+ end
104
+
105
+ # Consult policy adapter for additional table/column allowlists
106
+ adapter = CodeToQuery.config.policy_adapter
107
+ return unless adapter.respond_to?(:call)
108
+
109
+ policy_info = safe_call_policy_adapter(adapter, current_user, table: intent['table'], intent: intent)
110
+ allowed_tables = Array(policy_info[:allowed_tables] || policy_info['allowed_tables']).map { |t| t.to_s.downcase }
111
+ if allowed_tables.any?
112
+ table = intent['table']
113
+ if (table.to_s.strip != '') && !allowed_tables.include?(table.to_s.downcase)
114
+ raise ArgumentError, "Invalid intent: table '#{table}' not permitted by policy"
115
+ end
116
+ end
117
+
118
+ allowed_columns = policy_info[:allowed_columns] || policy_info['allowed_columns'] || {}
119
+ return if allowed_columns.nil? || allowed_columns.empty?
120
+
121
+ # Normalize map keys to strings with lowercase table and column names
122
+ normalized = {}
123
+ allowed_columns.each do |tbl, cols|
124
+ normalized[tbl.to_s.downcase] = Array(cols).map { |c| c.to_s.downcase }
125
+ end
126
+
127
+ main_table = intent['table'].to_s.downcase
128
+
129
+ # Columns in SELECT
130
+ Array(intent['columns']).each do |col|
131
+ next if col == '*'
132
+ next unless normalized[main_table]&.any?
133
+ unless normalized[main_table].include?(col.to_s.downcase)
134
+ raise ArgumentError, "Invalid intent: selecting column '#{col}' not permitted on '#{main_table}'"
135
+ end
136
+ end
137
+
138
+ # ORDER BY columns
139
+ Array(intent['order']).each do |o|
140
+ col = o['column']
141
+ next if col.nil?
142
+ next unless normalized[main_table]&.any?
143
+ unless normalized[main_table].include?(col.to_s.downcase)
144
+ raise ArgumentError, "Invalid intent: ordering by column '#{col}' not permitted on '#{main_table}'"
145
+ end
146
+ end
147
+
148
+ # DISTINCT ON columns
149
+ Array(intent['distinct_on']).each do |col|
150
+ next unless normalized[main_table]&.any?
151
+ unless normalized[main_table].include?(col.to_s.downcase)
152
+ raise ArgumentError, "Invalid intent: distinct_on column '#{col}' not permitted on '#{main_table}'"
153
+ end
154
+ end
155
+
156
+ # GROUP BY
157
+ Array(intent['group_by']).each do |col|
158
+ next unless normalized[main_table]&.any?
159
+ unless normalized[main_table].include?(col.to_s.downcase)
160
+ raise ArgumentError, "Invalid intent: group_by column '#{col}' not permitted on '#{main_table}'"
161
+ end
162
+ end
163
+
164
+ # WHERE filters
165
+ Array(intent['filters']).each do |f|
166
+ op = f['op'].to_s
167
+ if %w[exists not_exists].include?(op)
168
+ related_table = f['related_table']
169
+ rel_cols = normalized[related_table.to_s.downcase]
170
+ next if rel_cols.nil? || rel_cols.empty?
171
+
172
+ Array(f['related_filters']).each do |rf|
173
+ col = rf['column']
174
+ next if col.nil?
175
+ unless rel_cols.include?(col.to_s.downcase)
176
+ raise ArgumentError, "Invalid intent: filter column '#{col}' not permitted on '#{related_table}'"
177
+ end
178
+ end
179
+ else
180
+ col = f['column']
181
+ next if col.nil?
182
+
183
+ cols = normalized[main_table]
184
+ next if cols.nil? || cols.empty?
185
+ unless cols.include?(col.to_s.downcase)
186
+ raise ArgumentError, "Invalid intent: filter column '#{col}' not permitted on '#{main_table}'"
187
+ end
188
+ end
189
+ end
190
+ rescue StandardError => e
191
+ # Re-raise as ArgumentError to keep validator contract
192
+ raise ArgumentError, e.message
193
+ end
194
+
195
+ def safe_call_policy_adapter(adapter, current_user, table:, intent:)
196
+ adapter.call(current_user, table: table, intent: intent)
197
+ rescue ArgumentError
198
+ adapter.call(current_user, table: table)
199
+ rescue StandardError
200
+ {}
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CodeToQuery
4
+ # Gem version
5
+ VERSION = '0.1.0'
6
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Main entry point for CodeToQuery gem
4
+
5
+ require 'logger'
6
+ require 'active_support'
7
+ require 'active_support/core_ext/object/blank'
8
+ require 'json'
9
+ require 'active_support/core_ext/hash/keys'
10
+
11
+ require_relative 'code_to_query/version'
12
+ require_relative 'code_to_query/configuration'
13
+ require_relative 'code_to_query/providers/base'
14
+ require_relative 'code_to_query/providers/openai'
15
+ require_relative 'code_to_query/providers/local'
16
+ require_relative 'code_to_query/planner'
17
+ require_relative 'code_to_query/validator'
18
+ require_relative 'code_to_query/compiler'
19
+ require_relative 'code_to_query/runner'
20
+ require_relative 'code_to_query/query'
21
+ require_relative 'code_to_query/guardrails/sql_linter'
22
+ require_relative 'code_to_query/guardrails/explain_gate'
23
+ require_relative 'code_to_query/context/pack'
24
+ require_relative 'code_to_query/context/builder'
25
+ require_relative 'code_to_query/performance/cache'
26
+ require_relative 'code_to_query/performance/optimizer'
27
+ require_relative 'code_to_query/llm_client'
28
+ require_relative 'code_to_query/policies/pundit_adapter'
29
+ require_relative 'code_to_query/errors'
30
+ require_relative 'code_to_query/railtie' if defined?(Rails)
31
+
32
+ module CodeToQuery
33
+ class Error < StandardError; end
34
+
35
+ # Backward compatibility for new configuration accessors in older environments/tests
36
+ module BackCompat
37
+ module_function
38
+
39
+ def ensure_extended_config!(config)
40
+ # Logger
41
+ unless config.respond_to?(:logger)
42
+ class << config
43
+ attr_accessor :logger
44
+ end
45
+ config.logger = if defined?(Rails) && Rails.respond_to?(:logger)
46
+ Rails.logger
47
+ else
48
+ Logger.new($stdout)
49
+ end
50
+ end
51
+
52
+ # LLM knobs and prompt template
53
+ return if config.respond_to?(:system_prompt_template)
54
+
55
+ class << config
56
+ attr_accessor :system_prompt_template, :llm_api_base, :llm_timeout, :llm_temperature, :provider_options
57
+ end
58
+ config.system_prompt_template = nil
59
+ config.llm_api_base = ENV['CODE_TO_QUERY_LLM_API_BASE'] || 'https://api.openai.com/v1'
60
+ config.llm_timeout = Integer(ENV['CODE_TO_QUERY_LLM_TIMEOUT'] || 30)
61
+ config.llm_temperature = Float(ENV['CODE_TO_QUERY_LLM_TEMPERATURE'] || 0.1)
62
+ config.provider_options = {}
63
+ end
64
+ end
65
+
66
+ # Configure the gem
67
+ def self.configure
68
+ BackCompat.ensure_extended_config!(Configuration.instance)
69
+ yield(Configuration.instance)
70
+ end
71
+
72
+ # Access the current configuration
73
+ def self.config
74
+ BackCompat.ensure_extended_config!(Configuration.instance)
75
+ Configuration.instance
76
+ end
77
+
78
+ # Convert natural language to SQL query
79
+ # current_user is optional and only used if a policy_adapter requires it
80
+ def self.ask(prompt:, schema: nil, allow_tables: nil, current_user: nil)
81
+ intent = Planner.new(config).plan(prompt: prompt, schema: schema, allow_tables: allow_tables)
82
+ validated_intent = Validator.new.validate(intent, current_user: current_user, allow_tables: allow_tables).deep_stringify_keys
83
+ compiled = Compiler.new(config).compile(validated_intent, current_user: current_user)
84
+
85
+ Guardrails::SqlLinter.new(config, allow_tables: allow_tables).check!(compiled[:sql])
86
+
87
+ Query.new(sql: compiled[:sql], params: compiled[:params], bind_spec: compiled[:bind_spec],
88
+ intent: validated_intent, allow_tables: allow_tables, config: config)
89
+ end
90
+ end
@@ -0,0 +1,326 @@
1
+ # frozen_string_literal: true
2
+
3
+ namespace :code_to_query do
4
+ desc 'Bootstrap complete context pack (schema + app scan + glossary + policies)'
5
+ task bootstrap: :environment do
6
+ require 'code_to_query'
7
+
8
+ puts 'Starting CodeToQuery bootstrap process...'
9
+
10
+ begin
11
+ pack = CodeToQuery::Context::Builder.bootstrap!
12
+
13
+ puts 'Context pack created.'
14
+ puts "Location: #{CodeToQuery.config.context_pack_path}"
15
+ puts "Tables: #{pack.schema[:tables]&.length || 0}"
16
+ puts "Models: #{pack.models[:models]&.length || 0}"
17
+ puts "Glossary terms: #{pack.glossary&.length || 0}"
18
+
19
+ # Run verification
20
+ Rake::Task['code_to_query:verify'].invoke
21
+ rescue StandardError => e
22
+ puts "Bootstrap failed: #{e.message}"
23
+ puts 'Try running individual tasks to debug:'
24
+ puts ' rake code_to_query:schema'
25
+ puts ' rake code_to_query:scan_app'
26
+ puts ' rake code_to_query:glossary'
27
+ puts ' rake code_to_query:policies'
28
+ exit 1
29
+ end
30
+ end
31
+
32
+ desc 'Extract schema information from database'
33
+ task schema: :environment do
34
+ require 'code_to_query'
35
+
36
+ puts 'Extracting database schema...'
37
+
38
+ begin
39
+ builder = CodeToQuery::Context::Builder.new
40
+ schema = builder.extract_schema
41
+
42
+ pack_data = load_existing_pack
43
+ pack_data['schema'] = schema
44
+ pack_data['updated_at'] = Time.now.iso8601
45
+
46
+ write_pack_data(pack_data)
47
+
48
+ puts 'Schema extracted.'
49
+ puts "Tables: #{schema[:tables]&.length || 0}"
50
+ puts "Adapter: #{schema[:adapter]}"
51
+ puts "Version: #{schema[:version]}"
52
+ rescue StandardError => e
53
+ puts "Schema extraction failed: #{e.message}"
54
+ exit 1
55
+ end
56
+ end
57
+
58
+ desc 'Scan Rails application models and associations'
59
+ task scan_app: :environment do
60
+ require 'code_to_query'
61
+
62
+ puts 'Scanning Rails application...'
63
+
64
+ begin
65
+ builder = CodeToQuery::Context::Builder.new
66
+ app_data = builder.scan_app
67
+
68
+ pack_data = load_existing_pack
69
+ pack_data['models'] = app_data
70
+ pack_data['updated_at'] = Time.now.iso8601
71
+
72
+ write_pack_data(pack_data)
73
+
74
+ puts 'Application scan completed.'
75
+ puts "Models: #{app_data[:models]&.length || 0}"
76
+ puts "Associations: #{app_data[:associations]&.length || 0}"
77
+ rescue StandardError => e
78
+ puts "Application scan failed: #{e.message}"
79
+ exit 1
80
+ end
81
+ end
82
+
83
+ desc 'Generate business glossary and synonyms'
84
+ task glossary: :environment do
85
+ require 'code_to_query'
86
+
87
+ puts 'Generating business glossary...'
88
+
89
+ begin
90
+ builder = CodeToQuery::Context::Builder.new
91
+ glossary = builder.generate_glossary
92
+
93
+ pack_data = load_existing_pack
94
+ pack_data['glossary'] = glossary
95
+ pack_data['updated_at'] = Time.now.iso8601
96
+
97
+ write_pack_data(pack_data)
98
+
99
+ puts 'Glossary generated.'
100
+ puts "Terms: #{glossary&.length || 0}"
101
+ rescue StandardError => e
102
+ puts "Glossary generation failed: #{e.message}"
103
+ exit 1
104
+ end
105
+ end
106
+
107
+ desc 'Collect security policies and access rules'
108
+ task policies: :environment do
109
+ require 'code_to_query'
110
+
111
+ puts 'Collecting security policies...'
112
+
113
+ begin
114
+ builder = CodeToQuery::Context::Builder.new
115
+ policies = builder.collect_policies
116
+
117
+ pack_data = load_existing_pack
118
+ pack_data['policies'] = policies
119
+ pack_data['updated_at'] = Time.current.iso8601
120
+
121
+ write_pack_data(pack_data)
122
+
123
+ puts 'Policies collected.'
124
+ rescue StandardError => e
125
+ puts "Policy collection failed: #{e.message}"
126
+ exit 1
127
+ end
128
+ end
129
+
130
+ desc 'Verify context pack integrity and completeness'
131
+ task verify: :environment do
132
+ require 'code_to_query'
133
+
134
+ puts 'Verifying context pack...'
135
+
136
+ begin
137
+ builder = CodeToQuery::Context::Builder.new
138
+ builder.verify!
139
+
140
+ # Additional comprehensive verification
141
+ pack_data = load_existing_pack
142
+
143
+ # Check schema completeness
144
+ schema = pack_data['schema'] || {}
145
+ tables = schema['tables'] || []
146
+
147
+ puts 'Context pack verification passed.'
148
+ puts "Schema: #{tables.length} tables"
149
+
150
+ if pack_data['models']
151
+ models = pack_data['models']['models'] || {}
152
+ puts "Models: #{models.length} models"
153
+ end
154
+
155
+ puts "Glossary: #{pack_data['glossary'].length} terms" if pack_data['glossary']
156
+
157
+ # Check for potential issues
158
+ warnings = []
159
+ warnings << 'No tables found in schema' if tables.empty?
160
+ warnings << 'No models found' if pack_data.dig('models', 'models') && pack_data.dig('models', 'models').empty?
161
+ warnings << 'No glossary terms' if pack_data['glossary'] && pack_data['glossary'].empty?
162
+
163
+ if warnings.any?
164
+ puts "\nWarnings:"
165
+ warnings.each { |warning| puts " - #{warning}" }
166
+ end
167
+
168
+ puts "\nContext pack location: #{CodeToQuery.config.context_pack_path}"
169
+ rescue StandardError => e
170
+ puts "Verification failed: #{e.message}"
171
+ exit 1
172
+ end
173
+ end
174
+
175
+ desc 'Show context pack statistics and information'
176
+ task info: :environment do
177
+ require 'code_to_query'
178
+
179
+ puts 'CodeToQuery Context Pack Information'
180
+ puts '=' * 50
181
+
182
+ begin
183
+ pack_data = load_existing_pack
184
+
185
+ puts "Location: #{CodeToQuery.config.context_pack_path}"
186
+ puts "Last updated: #{pack_data['updated_at'] || 'Unknown'}"
187
+ puts
188
+
189
+ # Schema info
190
+ if pack_data['schema']
191
+ schema = pack_data['schema']
192
+ puts 'Database Schema:'
193
+ puts " Adapter: #{schema['adapter']}"
194
+ puts " Version: #{schema['version']}"
195
+ puts " Tables: #{schema['tables']&.length || 0}"
196
+
197
+ if schema['tables']&.any?
198
+ puts ' Sample tables:'
199
+ schema['tables'].first(5).each do |table|
200
+ column_count = table['columns']&.length || 0
201
+ puts " - #{table['name']} (#{column_count} columns)"
202
+ end
203
+ puts ' ...' if schema['tables'].length > 5
204
+ end
205
+ puts
206
+ end
207
+
208
+ # Models info
209
+ if pack_data['models']
210
+ models_data = pack_data['models']
211
+ models = models_data['models'] || {}
212
+ associations = models_data['associations'] || {}
213
+
214
+ puts 'Rails Models:'
215
+ puts " Models: #{models.length}"
216
+ puts " With associations: #{associations.length}"
217
+
218
+ if models.any?
219
+ puts ' Sample models:'
220
+ models.first(5).each do |name, info|
221
+ puts " - #{name} → #{info['table_name']}"
222
+ end
223
+ puts ' ...' if models.length > 5
224
+ end
225
+ puts
226
+ end
227
+
228
+ # Glossary info
229
+ if pack_data['glossary']
230
+ glossary = pack_data['glossary']
231
+ puts 'Business Glossary:'
232
+ puts " Terms: #{glossary.length}"
233
+
234
+ if glossary.any?
235
+ puts ' Sample terms:'
236
+ glossary.first(5).each do |term, synonyms|
237
+ puts " - #{term}: #{Array(synonyms).join(', ')}"
238
+ end
239
+ puts ' ...' if glossary.length > 5
240
+ end
241
+ puts
242
+ end
243
+
244
+ # Configuration info
245
+ config = CodeToQuery.config
246
+ puts 'Configuration:'
247
+ puts " Database adapter: #{config.adapter}"
248
+ puts " Default limit: #{config.default_limit}"
249
+ puts " Max limit: #{config.max_limit}"
250
+ puts " Readonly role: #{config.readonly_role || 'None'}"
251
+ puts " OpenAI model: #{config.openai_model}"
252
+ puts " Explain gate: #{config.enable_explain_gate ? 'Enabled' : 'Disabled'}"
253
+ rescue StandardError => e
254
+ puts "Could not load context pack: #{e.message}"
255
+ puts " Run 'rake code_to_query:bootstrap' to create it."
256
+ end
257
+ end
258
+
259
+ desc 'Clean and rebuild context pack'
260
+ task rebuild: :environment do
261
+ require 'code_to_query'
262
+
263
+ puts 'Rebuilding context pack...'
264
+
265
+ # Remove existing pack
266
+ if File.exist?(CodeToQuery.config.context_pack_path)
267
+ File.delete(CodeToQuery.config.context_pack_path)
268
+ puts 'Removed existing context pack'
269
+ end
270
+
271
+ # Run bootstrap
272
+ Rake::Task['code_to_query:bootstrap'].invoke
273
+ end
274
+
275
+ desc 'Test query generation with sample prompts'
276
+ task test: :environment do
277
+ require 'code_to_query'
278
+
279
+ puts 'Testing CodeToQuery with sample prompts...'
280
+
281
+ sample_prompts = [
282
+ 'Show me all users created this month',
283
+ 'Top 10 orders by amount',
284
+ 'Find invoices from July 2023',
285
+ 'Count active customers',
286
+ 'Recent payments over $1000'
287
+ ]
288
+
289
+ sample_prompts.each_with_index do |prompt, index|
290
+ puts "\n#{index + 1}. Testing: \"#{prompt}\""
291
+
292
+ begin
293
+ query = CodeToQuery.ask(prompt: prompt)
294
+ puts " SQL: #{query.sql}"
295
+ puts " Params: #{query.params.inspect}"
296
+ puts " Safe: #{query.safe?}"
297
+ rescue StandardError => e
298
+ puts " Error: #{e.message}"
299
+ end
300
+ end
301
+
302
+ puts "\nTest completed."
303
+ end
304
+
305
+ private
306
+
307
+ def load_existing_pack
308
+ path = CodeToQuery.config.context_pack_path
309
+ if File.exist?(path)
310
+ JSON.parse(File.read(path))
311
+ else
312
+ {}
313
+ end
314
+ rescue JSON::ParserError
315
+ puts 'Existing context pack has invalid JSON, starting fresh'
316
+ {}
317
+ end
318
+
319
+ def write_pack_data(data)
320
+ path = CodeToQuery.config.context_pack_path
321
+ dir = File.dirname(path)
322
+ FileUtils.mkdir_p(dir)
323
+
324
+ File.write(path, JSON.pretty_generate(data))
325
+ end
326
+ end