asktive_record 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +19 -1
- data/CHANGELOG.md +52 -1
- data/README.md +196 -114
- data/lib/asktive_record/adapters/base.rb +56 -0
- data/lib/asktive_record/adapters/openai.rb +62 -0
- data/lib/asktive_record/configuration.rb +37 -4
- data/lib/asktive_record/llm_service.rb +33 -56
- data/lib/asktive_record/log.rb +49 -0
- data/lib/asktive_record/model.rb +7 -43
- data/lib/asktive_record/prompt.rb +105 -54
- data/lib/asktive_record/query.rb +37 -26
- data/lib/asktive_record/schema_loader.rb +63 -0
- data/lib/asktive_record/service.rb +4 -50
- data/lib/asktive_record/sql_sanitizer.rb +92 -0
- data/lib/asktive_record/version.rb +1 -1
- data/lib/asktive_record.rb +36 -2
- data/lib/generators/asktive_record/templates/asktive_record_initializer.rb +29 -6
- data/sig/asktive_record.rbs +177 -1
- metadata +18 -29
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "openai"
|
|
4
|
+
require "asktive_record/adapters/base"
|
|
5
|
+
|
|
6
|
+
module AsktiveRecord
|
|
7
|
+
module Adapters
|
|
8
|
+
# OpenAI adapter for AsktiveRecord. Wraps the ruby-openai gem to provide
|
|
9
|
+
# LLM communication capabilities.
|
|
10
|
+
#
|
|
11
|
+
# @example
|
|
12
|
+
# adapter = AsktiveRecord::Adapters::OpenAI.new(
|
|
13
|
+
# api_key: ENV["OPENAI_API_KEY"],
|
|
14
|
+
# model_name: "gpt-4o"
|
|
15
|
+
# )
|
|
16
|
+
# response = adapter.chat("Generate a SQL query...")
|
|
17
|
+
class OpenAI < Base
|
|
18
|
+
DEFAULT_MODEL = "gpt-4o-mini"
|
|
19
|
+
DEFAULT_TEMPERATURE = 0.2
|
|
20
|
+
DEFAULT_MAX_TOKENS = 250
|
|
21
|
+
|
|
22
|
+
def initialize(api_key:, model_name: nil)
|
|
23
|
+
super
|
|
24
|
+
@client = nil
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Send a prompt to OpenAI and return the text response.
|
|
28
|
+
#
|
|
29
|
+
# @param prompt [String] the prompt to send
|
|
30
|
+
# @param options [Hash] additional options
|
|
31
|
+
# @option options [Float] :temperature (0.2) the temperature for response generation
|
|
32
|
+
# @option options [Integer] :max_tokens (250) the maximum tokens in the response
|
|
33
|
+
# @return [String, nil] the text response from the LLM
|
|
34
|
+
def chat(prompt, options = {})
|
|
35
|
+
response = client.chat(
|
|
36
|
+
parameters: {
|
|
37
|
+
model: resolved_model_name,
|
|
38
|
+
messages: [{ role: "user", content: prompt }],
|
|
39
|
+
temperature: options.fetch(:temperature, DEFAULT_TEMPERATURE),
|
|
40
|
+
max_tokens: options.fetch(:max_tokens, DEFAULT_MAX_TOKENS)
|
|
41
|
+
}
|
|
42
|
+
)
|
|
43
|
+
response.dig("choices", 0, "message", "content")&.strip
|
|
44
|
+
rescue ::OpenAI::Error => e
|
|
45
|
+
raise ApiError, "OpenAI API error: #{e.message}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Returns the default model name for OpenAI.
|
|
49
|
+
#
|
|
50
|
+
# @return [String]
|
|
51
|
+
def default_model_name
|
|
52
|
+
DEFAULT_MODEL
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def client
|
|
58
|
+
@client ||= ::OpenAI::Client.new(access_token: api_key)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -2,17 +2,50 @@
|
|
|
2
2
|
|
|
3
3
|
module AsktiveRecord
|
|
4
4
|
# Configuration class for AsktiveRecord
|
|
5
|
-
# This class holds the configuration settings for the LLM provider, API key, model name
|
|
6
|
-
#
|
|
5
|
+
# This class holds the configuration settings for the LLM provider, API key, model name,
|
|
6
|
+
# database schema path, logging preferences, and adapter settings.
|
|
7
7
|
class Configuration
|
|
8
|
-
attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path,
|
|
8
|
+
attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path,
|
|
9
|
+
:skip_dump_schema, :logger, :read_only, :adapter,
|
|
10
|
+
:temperature, :max_tokens, :cache_enabled
|
|
9
11
|
|
|
10
12
|
def initialize
|
|
11
13
|
@llm_provider = :openai # Default LLM provider
|
|
12
14
|
@llm_api_key = nil
|
|
13
|
-
@llm_model_name = "gpt-
|
|
15
|
+
@llm_model_name = "gpt-4o-mini" # Default model for OpenAI (gpt-3.5-turbo is deprecated)
|
|
14
16
|
@db_schema_path = "db/schema.rb" # Default path for Rails schema file
|
|
15
17
|
@skip_dump_schema = false # Default is to not skip schema dump
|
|
18
|
+
@logger = nil # Will use AsktiveRecord::Log default if nil
|
|
19
|
+
@read_only = true # Default to read-only mode (SELECT only)
|
|
20
|
+
@adapter = nil # Will be built from llm_provider if nil
|
|
21
|
+
@temperature = 0.2 # Default temperature for LLM
|
|
22
|
+
@max_tokens = 250 # Default max tokens for LLM response
|
|
23
|
+
@cache_enabled = false # Disabled by default
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Builds and returns the appropriate adapter based on configuration.
|
|
27
|
+
# If a custom adapter is set, returns it directly.
|
|
28
|
+
# Otherwise, builds one from llm_provider setting.
|
|
29
|
+
#
|
|
30
|
+
# @return [AsktiveRecord::Adapters::Base] the configured adapter
|
|
31
|
+
def resolved_adapter
|
|
32
|
+
return @adapter if @adapter
|
|
33
|
+
|
|
34
|
+
build_adapter_from_provider
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def build_adapter_from_provider
|
|
40
|
+
case llm_provider
|
|
41
|
+
when :openai
|
|
42
|
+
require "asktive_record/adapters/openai"
|
|
43
|
+
Adapters::OpenAI.new(api_key: llm_api_key, model_name: llm_model_name)
|
|
44
|
+
else
|
|
45
|
+
raise ConfigurationError,
|
|
46
|
+
"Unsupported LLM provider: #{llm_provider}. " \
|
|
47
|
+
"Supported providers: :openai. Or set a custom adapter via config.adapter."
|
|
48
|
+
end
|
|
16
49
|
end
|
|
17
50
|
end
|
|
18
51
|
end
|
|
@@ -1,123 +1,100 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "openai"
|
|
4
3
|
require "asktive_record/prompt"
|
|
4
|
+
require "asktive_record/log"
|
|
5
5
|
|
|
6
6
|
module AsktiveRecord
|
|
7
7
|
# Service class for interacting with the LLM API to generate SQL queries
|
|
8
8
|
# and answer questions based on the generated queries and database responses.
|
|
9
|
+
# Uses the adapter pattern to support multiple LLM providers.
|
|
9
10
|
class LlmService
|
|
10
11
|
attr_reader :configuration
|
|
11
12
|
|
|
12
13
|
def initialize(configuration)
|
|
13
14
|
@configuration = configuration
|
|
15
|
+
@adapter = nil
|
|
16
|
+
|
|
14
17
|
return if @configuration&.llm_api_key
|
|
15
18
|
|
|
16
19
|
raise ConfigurationError,
|
|
17
|
-
"LLM API key is not configured. Please set it in config/initializers/asktive_record.rb\
|
|
18
|
-
|
|
20
|
+
"LLM API key is not configured. Please set it in config/initializers/asktive_record.rb " \
|
|
21
|
+
"or via environment variable."
|
|
19
22
|
end
|
|
20
23
|
|
|
21
|
-
# Placeholder for schema upload/management with the LLM if needed for more advanced scenarios
|
|
24
|
+
# Placeholder for schema upload/management with the LLM if needed for more advanced scenarios.
|
|
22
25
|
# For instance, if using OpenAI Assistants API or fine-tuning.
|
|
23
26
|
# For now, the schema is passed with each query.
|
|
24
27
|
def upload_schema(_schema_string)
|
|
25
|
-
|
|
26
|
-
puts "Schema upload functionality is a placeholder for now."
|
|
28
|
+
AsktiveRecord::Log.info("Schema upload functionality is a placeholder for now.")
|
|
27
29
|
true
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
def answer(question, query, response)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
33
|
+
AsktiveRecord::Log.info("Answering question: #{question}")
|
|
34
|
+
AsktiveRecord::Log.debug("Generated SQL query: #{query}")
|
|
35
|
+
AsktiveRecord::Log.debug("Response from database: #{response.inspect}")
|
|
34
36
|
answer_as_human(question, query, response)
|
|
35
37
|
end
|
|
36
38
|
|
|
37
|
-
#
|
|
39
|
+
# Method for model-specific queries
|
|
38
40
|
def generate_sql(natural_language_query, schema_string, table_name)
|
|
39
|
-
client = OpenAI::Client.new(access_token: configuration.llm_api_key)
|
|
40
|
-
|
|
41
41
|
prompt = Prompt.as_sql_generator_for_model(
|
|
42
42
|
natural_language_query,
|
|
43
43
|
schema_string,
|
|
44
44
|
table_name
|
|
45
45
|
)
|
|
46
46
|
|
|
47
|
-
generate_and_validate_sql(
|
|
47
|
+
generate_and_validate_sql(prompt)
|
|
48
48
|
end
|
|
49
49
|
|
|
50
|
-
#
|
|
50
|
+
# Method for service-class-based queries that can target any table
|
|
51
51
|
def generate_sql_for_service(natural_language_query, schema_string, _target_table = "any")
|
|
52
|
-
client = OpenAI::Client.new(access_token: configuration.llm_api_key)
|
|
53
52
|
prompt = Prompt.as_sql_generator(natural_language_query, schema_string)
|
|
54
|
-
generate_and_validate_sql(
|
|
53
|
+
generate_and_validate_sql(prompt)
|
|
55
54
|
end
|
|
56
55
|
|
|
57
56
|
private
|
|
58
57
|
|
|
59
58
|
def answer_as_human(question, query, response)
|
|
60
59
|
prompt = Prompt.as_human_answerer(question, query, response)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
rescue OpenAI::Error => e
|
|
65
|
-
raise ApiError, "OpenAI API error: #{e.message}"
|
|
60
|
+
adapter.chat(prompt, llm_options)
|
|
61
|
+
rescue ApiError
|
|
62
|
+
raise
|
|
66
63
|
rescue StandardError => e
|
|
67
64
|
raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
|
|
68
65
|
end
|
|
69
66
|
|
|
70
|
-
def
|
|
71
|
-
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def call_llm(client, prompt)
|
|
75
|
-
client.chat(
|
|
76
|
-
parameters: {
|
|
77
|
-
model: configuration.llm_model_name || "gpt-3.5-turbo",
|
|
78
|
-
messages: [{ role: "user", content: prompt }],
|
|
79
|
-
temperature: 0.2,
|
|
80
|
-
max_tokens: 250
|
|
81
|
-
}
|
|
82
|
-
)
|
|
67
|
+
def adapter
|
|
68
|
+
@adapter ||= configuration.resolved_adapter
|
|
83
69
|
end
|
|
84
70
|
|
|
85
|
-
def
|
|
86
|
-
|
|
71
|
+
def llm_options
|
|
72
|
+
{
|
|
73
|
+
temperature: configuration.temperature || 0.2,
|
|
74
|
+
max_tokens: configuration.max_tokens || 250
|
|
75
|
+
}
|
|
87
76
|
end
|
|
88
77
|
|
|
89
|
-
def generate_and_validate_sql(
|
|
90
|
-
raw_sql =
|
|
78
|
+
def generate_and_validate_sql(prompt)
|
|
79
|
+
raw_sql = adapter.chat(prompt, llm_options)
|
|
91
80
|
validate_sql_response!(raw_sql)
|
|
92
|
-
|
|
93
|
-
rescue
|
|
94
|
-
raise
|
|
81
|
+
clean_sql(raw_sql)
|
|
82
|
+
rescue ApiError
|
|
83
|
+
raise
|
|
95
84
|
rescue StandardError => e
|
|
96
85
|
raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
|
|
97
86
|
end
|
|
98
87
|
|
|
99
|
-
def fetch_sql_from_llm(client, prompt)
|
|
100
|
-
response = client.chat(
|
|
101
|
-
parameters: {
|
|
102
|
-
model: configuration.llm_model_name || "gpt-3.5-turbo",
|
|
103
|
-
messages: [{ role: "user", content: prompt }],
|
|
104
|
-
temperature: 0.2,
|
|
105
|
-
max_tokens: 250
|
|
106
|
-
}
|
|
107
|
-
)
|
|
108
|
-
response.dig("choices", 0, "message", "content")&.strip
|
|
109
|
-
end
|
|
110
|
-
|
|
111
88
|
def validate_sql_response!(raw_sql)
|
|
112
89
|
raise QueryGenerationError, "LLM did not return a SQL query." if raw_sql.nil? || raw_sql.empty?
|
|
113
90
|
|
|
114
|
-
return if raw_sql.
|
|
91
|
+
return if raw_sql.strip.match?(/\ASELECT\b/i)
|
|
115
92
|
|
|
116
93
|
raise QueryGenerationError, "LLM generated a non-SELECT query: #{raw_sql}"
|
|
117
94
|
end
|
|
118
95
|
|
|
119
|
-
def
|
|
120
|
-
sql.
|
|
96
|
+
def clean_sql(sql)
|
|
97
|
+
sql.gsub(/;\s*\z/, "").strip
|
|
121
98
|
end
|
|
122
99
|
end
|
|
123
100
|
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "logger"
|
|
4
|
+
|
|
5
|
+
module AsktiveRecord
|
|
6
|
+
# Provides a configurable logging interface for AsktiveRecord.
|
|
7
|
+
# Defaults to Rails.logger when available, or a standard Logger to $stdout.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# AsktiveRecord.logger.info("Something happened")
|
|
11
|
+
# AsktiveRecord.logger.debug("Debug info")
|
|
12
|
+
module Log
|
|
13
|
+
PREFIX = "[AsktiveRecord]"
|
|
14
|
+
|
|
15
|
+
class << self
|
|
16
|
+
def logger
|
|
17
|
+
@logger ||= default_logger
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
attr_writer :logger
|
|
21
|
+
|
|
22
|
+
def info(message)
|
|
23
|
+
logger.info("#{PREFIX} #{message}")
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def debug(message)
|
|
27
|
+
logger.debug("#{PREFIX} #{message}")
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def warn(message)
|
|
31
|
+
logger.warn("#{PREFIX} #{message}")
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def error(message)
|
|
35
|
+
logger.error("#{PREFIX} #{message}")
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
private
|
|
39
|
+
|
|
40
|
+
def default_logger
|
|
41
|
+
if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
42
|
+
Rails.logger
|
|
43
|
+
else
|
|
44
|
+
::Logger.new($stdout, level: ::Logger::INFO)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
data/lib/asktive_record/model.rb
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "asktive_record/llm_service"
|
|
4
|
+
require "asktive_record/schema_loader"
|
|
5
|
+
require "asktive_record/log"
|
|
4
6
|
|
|
5
7
|
module AsktiveRecord
|
|
6
8
|
module Model
|
|
7
9
|
# Provides class-level methods for AsktiveRecord models, enabling natural language queries and configuration checks.
|
|
8
10
|
module ClassMethods
|
|
11
|
+
include AsktiveRecord::SchemaLoader
|
|
12
|
+
|
|
9
13
|
def asktive_record
|
|
10
14
|
return if AsktiveRecord.configuration
|
|
11
15
|
|
|
@@ -15,10 +19,10 @@ module AsktiveRecord
|
|
|
15
19
|
end
|
|
16
20
|
|
|
17
21
|
def ask(natural_language_query)
|
|
18
|
-
|
|
22
|
+
validate_llm_api_key!
|
|
19
23
|
|
|
20
|
-
schema_content =
|
|
21
|
-
|
|
24
|
+
schema_content = load_schema_content
|
|
25
|
+
ensure_schema_is_not_empty!(schema_content)
|
|
22
26
|
|
|
23
27
|
llm_service = AsktiveRecord::LlmService.new(AsktiveRecord.configuration)
|
|
24
28
|
current_table_name = respond_to?(:table_name) ? table_name : name.downcase.pluralize
|
|
@@ -27,46 +31,6 @@ module AsktiveRecord
|
|
|
27
31
|
|
|
28
32
|
AsktiveRecord::Query.new(natural_language_query, raw_sql, self)
|
|
29
33
|
end
|
|
30
|
-
|
|
31
|
-
private
|
|
32
|
-
|
|
33
|
-
def ensure_api_key_configured!
|
|
34
|
-
return if AsktiveRecord.configuration&.llm_api_key
|
|
35
|
-
|
|
36
|
-
raise ConfigurationError, "LLM API key is not configured for AsktiveRecord."
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
def load_schema
|
|
40
|
-
schema_path = AsktiveRecord.configuration.db_schema_path
|
|
41
|
-
return File.read(schema_path) if File.exist?(schema_path)
|
|
42
|
-
|
|
43
|
-
puts "Schema file not found at #{schema_path}. Attempting to generate it."
|
|
44
|
-
try_dump_schema(schema_path) || try_structure_sql || raise_schema_error(schema_path)
|
|
45
|
-
rescue SystemCallError => e
|
|
46
|
-
raise ConfigurationError, "Error reading schema file at #{schema_path}: #{e.message}"
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def try_dump_schema(schema_path)
|
|
50
|
-
return unless defined?(Rails) && !AsktiveRecord.configuration.skip_dump_schema
|
|
51
|
-
|
|
52
|
-
system("bin/rails db:schema:dump")
|
|
53
|
-
File.exist?(schema_path) ? File.read(schema_path) : nil
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
def try_structure_sql
|
|
57
|
-
path = "db/structure.sql"
|
|
58
|
-
return unless File.exist?(path)
|
|
59
|
-
|
|
60
|
-
puts "Using schema from #{path}"
|
|
61
|
-
File.read(path)
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
def raise_schema_error(schema_path)
|
|
65
|
-
raise ConfigurationError, <<~MSG.strip
|
|
66
|
-
Database schema file not found at #{schema_path} or db/structure.sql.
|
|
67
|
-
Please run `asktive_record:setup` or configure the correct path.
|
|
68
|
-
MSG
|
|
69
|
-
end
|
|
70
34
|
end
|
|
71
35
|
end
|
|
72
36
|
end
|
|
@@ -1,72 +1,123 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module AsktiveRecord
|
|
4
|
-
# Prompt class for generating SQL queries from natural language questions
|
|
4
|
+
# Prompt class for generating SQL queries from natural language questions.
|
|
5
|
+
# All user inputs are escaped before interpolation to mitigate prompt injection.
|
|
5
6
|
class Prompt
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
#{response}.
|
|
16
|
-
Now I need you to answer the question based on what I asked you and the result I got.
|
|
17
|
-
Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
|
|
18
|
-
E.g. if the result is a list of users, you might say "There are 5 users in the database." or "The first user is John Doe." or "The average age of users is 30 years." depending on the context of the question.
|
|
19
|
-
Answer in the same language as the question was asked in "#{question}"
|
|
20
|
-
PROMPT
|
|
21
|
-
end
|
|
7
|
+
# Characters that could be used for prompt injection
|
|
8
|
+
PROMPT_INJECTION_PATTERNS = [
|
|
9
|
+
/ignore\s+(all\s+)?previous\s+instructions/i,
|
|
10
|
+
/forget\s+(all\s+)?previous/i,
|
|
11
|
+
/disregard\s+(all\s+)?above/i,
|
|
12
|
+
/you\s+are\s+now/i,
|
|
13
|
+
/new\s+instructions?:/i,
|
|
14
|
+
/system\s*:/i
|
|
15
|
+
].freeze
|
|
22
16
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
17
|
+
class << self
|
|
18
|
+
def as_human_answerer(question, query, response)
|
|
19
|
+
safe_question = escape_input(question)
|
|
20
|
+
safe_query = escape_input(query.to_s)
|
|
21
|
+
safe_response = escape_input(response.to_s)
|
|
27
22
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
```
|
|
23
|
+
<<~PROMPT
|
|
24
|
+
You are a helpful data assistant. Answer the user's question based on the SQL query result below.
|
|
25
|
+
Keep in mind the language of the question and answer in the same language.
|
|
32
26
|
|
|
33
|
-
|
|
27
|
+
If the response looks like an ActiveRecord::Result (with @rows), convert it to a human-readable format
|
|
28
|
+
by extracting the relevant data from the rows.
|
|
34
29
|
|
|
35
|
-
|
|
36
|
-
You should determine the appropriate table(s) to query from the schema and the natural language query.
|
|
37
|
-
Use JOINs when necessary to query data across multiple tables.
|
|
30
|
+
Question: "#{safe_question}"
|
|
38
31
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
- If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
|
|
42
|
-
- If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
|
|
43
|
-
- If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
|
|
32
|
+
SQL Query that was executed:
|
|
33
|
+
#{safe_query}
|
|
44
34
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
35
|
+
Query Result:
|
|
36
|
+
#{safe_response}
|
|
37
|
+
|
|
38
|
+
Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
|
|
39
|
+
For example:
|
|
40
|
+
- If the result is a list of users: "There are 5 users in the database."
|
|
41
|
+
- If the result is a single record: "The first user is John Doe."
|
|
42
|
+
- If the result is an aggregate: "The average age of users is 30 years."
|
|
43
|
+
|
|
44
|
+
Answer in the same language as the question.
|
|
45
|
+
PROMPT
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def as_sql_generator(natural_language_query, schema_string)
|
|
49
|
+
safe_query = escape_input(natural_language_query)
|
|
50
|
+
|
|
51
|
+
<<~PROMPT
|
|
52
|
+
You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
|
|
53
|
+
Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
|
|
54
|
+
|
|
55
|
+
Database Schema:
|
|
56
|
+
```sql
|
|
57
|
+
#{schema_string}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
Natural Language Query: "#{safe_query}"
|
|
61
|
+
|
|
62
|
+
Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
|
|
63
|
+
You should determine the appropriate table(s) to query from the schema and the natural language query.
|
|
64
|
+
Use JOINs when necessary to query data across multiple tables.
|
|
65
|
+
|
|
66
|
+
Examples:
|
|
67
|
+
- If the query is "show me all users", the output should be: SELECT * FROM users;
|
|
68
|
+
- If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
|
|
69
|
+
- If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
|
|
70
|
+
- If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
|
|
71
|
+
|
|
72
|
+
SQL Query:
|
|
73
|
+
PROMPT
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def as_sql_generator_for_model(natural_language_query, schema_string, table_name)
|
|
77
|
+
safe_query = escape_input(natural_language_query)
|
|
78
|
+
safe_table = escape_input(table_name.to_s)
|
|
79
|
+
|
|
80
|
+
<<~PROMPT
|
|
81
|
+
You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
|
|
82
|
+
Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
|
|
83
|
+
The query should be for the table: #{safe_table}.
|
|
84
|
+
|
|
85
|
+
Database Schema:
|
|
86
|
+
```sql
|
|
87
|
+
#{schema_string}
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Natural Language Query: "#{safe_query}"
|
|
91
|
+
|
|
92
|
+
Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
|
|
93
|
+
For example, if the query is "show me all users", and the table is `users`, the output should be:
|
|
94
|
+
SELECT * FROM users;
|
|
95
|
+
If the query is "find the last 5 registered users", the output should be:
|
|
96
|
+
SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
|
|
97
|
+
|
|
98
|
+
SQL Query:
|
|
99
|
+
PROMPT
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
48
103
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
The query should be for the table: #{table_name}.
|
|
104
|
+
# Escapes user input to mitigate prompt injection attacks.
|
|
105
|
+
# Strips known injection patterns and sanitizes special characters.
|
|
106
|
+
def escape_input(input)
|
|
107
|
+
return "" if input.nil?
|
|
54
108
|
|
|
55
|
-
|
|
56
|
-
```sql
|
|
57
|
-
#{schema_string}
|
|
58
|
-
```
|
|
109
|
+
sanitized = input.to_s.dup
|
|
59
110
|
|
|
60
|
-
|
|
111
|
+
# Remove known prompt injection patterns
|
|
112
|
+
PROMPT_INJECTION_PATTERNS.each do |pattern|
|
|
113
|
+
sanitized.gsub!(pattern, "[FILTERED]")
|
|
114
|
+
end
|
|
61
115
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
SELECT * FROM users;
|
|
65
|
-
If the query is "find the last 5 registered users", the output should be:
|
|
66
|
-
SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
|
|
116
|
+
# Limit input length to prevent token abuse
|
|
117
|
+
sanitized = sanitized[0, 2000] if sanitized.length > 2000
|
|
67
118
|
|
|
68
|
-
|
|
69
|
-
|
|
119
|
+
sanitized
|
|
120
|
+
end
|
|
70
121
|
end
|
|
71
122
|
end
|
|
72
123
|
end
|