asktive_record 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openai"
4
+ require "asktive_record/adapters/base"
5
+
6
+ module AsktiveRecord
7
+ module Adapters
8
+ # OpenAI adapter for AsktiveRecord. Wraps the ruby-openai gem to provide
9
+ # LLM communication capabilities.
10
+ #
11
+ # @example
12
+ # adapter = AsktiveRecord::Adapters::OpenAI.new(
13
+ # api_key: ENV["OPENAI_API_KEY"],
14
+ # model_name: "gpt-4o"
15
+ # )
16
+ # response = adapter.chat("Generate a SQL query...")
17
+ class OpenAI < Base
18
+ DEFAULT_MODEL = "gpt-4o-mini"
19
+ DEFAULT_TEMPERATURE = 0.2
20
+ DEFAULT_MAX_TOKENS = 250
21
+
22
+ def initialize(api_key:, model_name: nil)
23
+ super
24
+ @client = nil
25
+ end
26
+
27
+ # Send a prompt to OpenAI and return the text response.
28
+ #
29
+ # @param prompt [String] the prompt to send
30
+ # @param options [Hash] additional options
31
+ # @option options [Float] :temperature (0.2) the temperature for response generation
32
+ # @option options [Integer] :max_tokens (250) the maximum tokens in the response
33
+ # @return [String, nil] the text response from the LLM
34
+ def chat(prompt, options = {})
35
+ response = client.chat(
36
+ parameters: {
37
+ model: resolved_model_name,
38
+ messages: [{ role: "user", content: prompt }],
39
+ temperature: options.fetch(:temperature, DEFAULT_TEMPERATURE),
40
+ max_tokens: options.fetch(:max_tokens, DEFAULT_MAX_TOKENS)
41
+ }
42
+ )
43
+ response.dig("choices", 0, "message", "content")&.strip
44
+ rescue ::OpenAI::Error => e
45
+ raise ApiError, "OpenAI API error: #{e.message}"
46
+ end
47
+
48
+ # Returns the default model name for OpenAI.
49
+ #
50
+ # @return [String]
51
+ def default_model_name
52
+ DEFAULT_MODEL
53
+ end
54
+
55
+ private
56
+
57
+ def client
58
+ @client ||= ::OpenAI::Client.new(access_token: api_key)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -2,17 +2,50 @@
2
2
 
3
3
  module AsktiveRecord
4
4
  # Configuration class for AsktiveRecord
5
- # This class holds the configuration settings for the LLM provider, API key, model name
6
- # and database schema path.
5
+ # This class holds the configuration settings for the LLM provider, API key, model name,
6
+ # database schema path, logging preferences, and adapter settings.
7
7
  class Configuration
8
- attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path, :skip_dump_schema
8
+ attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path,
9
+ :skip_dump_schema, :logger, :read_only, :adapter,
10
+ :temperature, :max_tokens, :cache_enabled
9
11
 
10
12
  def initialize
11
13
  @llm_provider = :openai # Default LLM provider
12
14
  @llm_api_key = nil
13
- @llm_model_name = "gpt-3.5-turbo" # Default model for OpenAI
15
+ @llm_model_name = "gpt-4o-mini" # Default model for OpenAI (gpt-3.5-turbo is deprecated)
14
16
  @db_schema_path = "db/schema.rb" # Default path for Rails schema file
15
17
  @skip_dump_schema = false # Default is to not skip schema dump
18
+ @logger = nil # Will use AsktiveRecord::Log default if nil
19
+ @read_only = true # Default to read-only mode (SELECT only)
20
+ @adapter = nil # Will be built from llm_provider if nil
21
+ @temperature = 0.2 # Default temperature for LLM
22
+ @max_tokens = 250 # Default max tokens for LLM response
23
+ @cache_enabled = false # Disabled by default
24
+ end
25
+
26
+ # Builds and returns the appropriate adapter based on configuration.
27
+ # If a custom adapter is set, returns it directly.
28
+ # Otherwise, builds one from llm_provider setting.
29
+ #
30
+ # @return [AsktiveRecord::Adapters::Base] the configured adapter
31
+ def resolved_adapter
32
+ return @adapter if @adapter
33
+
34
+ build_adapter_from_provider
35
+ end
36
+
37
+ private
38
+
39
+ def build_adapter_from_provider
40
+ case llm_provider
41
+ when :openai
42
+ require "asktive_record/adapters/openai"
43
+ Adapters::OpenAI.new(api_key: llm_api_key, model_name: llm_model_name)
44
+ else
45
+ raise ConfigurationError,
46
+ "Unsupported LLM provider: #{llm_provider}. " \
47
+ "Supported providers: :openai. Or set a custom adapter via config.adapter."
48
+ end
16
49
  end
17
50
  end
18
51
  end
@@ -1,123 +1,100 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "openai"
4
3
  require "asktive_record/prompt"
4
+ require "asktive_record/log"
5
5
 
6
6
  module AsktiveRecord
7
7
  # Service class for interacting with the LLM API to generate SQL queries
8
8
  # and answer questions based on the generated queries and database responses.
9
+ # Uses the adapter pattern to support multiple LLM providers.
9
10
  class LlmService
10
11
  attr_reader :configuration
11
12
 
12
13
  def initialize(configuration)
13
14
  @configuration = configuration
15
+ @adapter = nil
16
+
14
17
  return if @configuration&.llm_api_key
15
18
 
16
19
  raise ConfigurationError,
17
- "LLM API key is not configured. Please set it in config/initializers/asktive_record.rb\
18
- or via environment variable."
20
+ "LLM API key is not configured. Please set it in config/initializers/asktive_record.rb " \
21
+ "or via environment variable."
19
22
  end
20
23
 
21
- # Placeholder for schema upload/management with the LLM if needed for more advanced scenarios
24
+ # Placeholder for schema upload/management with the LLM if needed for more advanced scenarios.
22
25
  # For instance, if using OpenAI Assistants API or fine-tuning.
23
26
  # For now, the schema is passed with each query.
24
27
  def upload_schema(_schema_string)
25
- # This could be used to upload schema to a vector store or a fine-tuning dataset in the future.
26
- puts "Schema upload functionality is a placeholder for now."
28
+ AsktiveRecord::Log.info("Schema upload functionality is a placeholder for now.")
27
29
  true
28
30
  end
29
31
 
30
32
  def answer(question, query, response)
31
- puts "Answering question: #{question}"
32
- puts "Generated SQL query: #{query}"
33
- puts "Response from database: #{response.inspect}"
33
+ AsktiveRecord::Log.info("Answering question: #{question}")
34
+ AsktiveRecord::Log.debug("Generated SQL query: #{query}")
35
+ AsktiveRecord::Log.debug("Response from database: #{response.inspect}")
34
36
  answer_as_human(question, query, response)
35
37
  end
36
38
 
37
- # Original method for model-specific queries
39
+ # Method for model-specific queries
38
40
  def generate_sql(natural_language_query, schema_string, table_name)
39
- client = OpenAI::Client.new(access_token: configuration.llm_api_key)
40
-
41
41
  prompt = Prompt.as_sql_generator_for_model(
42
42
  natural_language_query,
43
43
  schema_string,
44
44
  table_name
45
45
  )
46
46
 
47
- generate_and_validate_sql(client, prompt)
47
+ generate_and_validate_sql(prompt)
48
48
  end
49
49
 
50
- # New method for service-class-based queries that can target any table
50
+ # Method for service-class-based queries that can target any table
51
51
  def generate_sql_for_service(natural_language_query, schema_string, _target_table = "any")
52
- client = OpenAI::Client.new(access_token: configuration.llm_api_key)
53
52
  prompt = Prompt.as_sql_generator(natural_language_query, schema_string)
54
- generate_and_validate_sql(client, prompt)
53
+ generate_and_validate_sql(prompt)
55
54
  end
56
55
 
57
56
  private
58
57
 
59
58
  def answer_as_human(question, query, response)
60
59
  prompt = Prompt.as_human_answerer(question, query, response)
61
- client = build_client
62
- llm_response = call_llm(client, prompt)
63
- extract_answer(llm_response)
64
- rescue OpenAI::Error => e
65
- raise ApiError, "OpenAI API error: #{e.message}"
60
+ adapter.chat(prompt, llm_options)
61
+ rescue ApiError
62
+ raise
66
63
  rescue StandardError => e
67
64
  raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
68
65
  end
69
66
 
70
- def build_client
71
- OpenAI::Client.new(access_token: configuration.llm_api_key)
72
- end
73
-
74
- def call_llm(client, prompt)
75
- client.chat(
76
- parameters: {
77
- model: configuration.llm_model_name || "gpt-3.5-turbo",
78
- messages: [{ role: "user", content: prompt }],
79
- temperature: 0.2,
80
- max_tokens: 250
81
- }
82
- )
67
+ def adapter
68
+ @adapter ||= configuration.resolved_adapter
83
69
  end
84
70
 
85
- def extract_answer(response)
86
- response.dig("choices", 0, "message", "content")&.strip
71
+ def llm_options
72
+ {
73
+ temperature: configuration.temperature || 0.2,
74
+ max_tokens: configuration.max_tokens || 250
75
+ }
87
76
  end
88
77
 
89
- def generate_and_validate_sql(client, prompt)
90
- raw_sql = fetch_sql_from_llm(client, prompt)
78
+ def generate_and_validate_sql(prompt)
79
+ raw_sql = adapter.chat(prompt, llm_options)
91
80
  validate_sql_response!(raw_sql)
92
- sanitize_sql(raw_sql)
93
- rescue OpenAI::Error => e
94
- raise ApiError, "OpenAI API error: #{e.message}"
81
+ clean_sql(raw_sql)
82
+ rescue ApiError
83
+ raise
95
84
  rescue StandardError => e
96
85
  raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
97
86
  end
98
87
 
99
- def fetch_sql_from_llm(client, prompt)
100
- response = client.chat(
101
- parameters: {
102
- model: configuration.llm_model_name || "gpt-3.5-turbo",
103
- messages: [{ role: "user", content: prompt }],
104
- temperature: 0.2,
105
- max_tokens: 250
106
- }
107
- )
108
- response.dig("choices", 0, "message", "content")&.strip
109
- end
110
-
111
88
  def validate_sql_response!(raw_sql)
112
89
  raise QueryGenerationError, "LLM did not return a SQL query." if raw_sql.nil? || raw_sql.empty?
113
90
 
114
- return if raw_sql.downcase.start_with?("select")
91
+ return if raw_sql.strip.match?(/\ASELECT\b/i)
115
92
 
116
93
  raise QueryGenerationError, "LLM generated a non-SELECT query: #{raw_sql}"
117
94
  end
118
95
 
119
- def sanitize_sql(sql)
120
- sql.chomp(";")
96
+ def clean_sql(sql)
97
+ sql.gsub(/;\s*\z/, "").strip
121
98
  end
122
99
  end
123
100
  end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "logger"
4
+
5
+ module AsktiveRecord
6
+ # Provides a configurable logging interface for AsktiveRecord.
7
+ # Defaults to Rails.logger when available, or a standard Logger to $stdout.
8
+ #
9
+ # Usage:
10
+ # AsktiveRecord.logger.info("Something happened")
11
+ # AsktiveRecord.logger.debug("Debug info")
12
+ module Log
13
+ PREFIX = "[AsktiveRecord]"
14
+
15
+ class << self
16
+ def logger
17
+ @logger ||= default_logger
18
+ end
19
+
20
+ attr_writer :logger
21
+
22
+ def info(message)
23
+ logger.info("#{PREFIX} #{message}")
24
+ end
25
+
26
+ def debug(message)
27
+ logger.debug("#{PREFIX} #{message}")
28
+ end
29
+
30
+ def warn(message)
31
+ logger.warn("#{PREFIX} #{message}")
32
+ end
33
+
34
+ def error(message)
35
+ logger.error("#{PREFIX} #{message}")
36
+ end
37
+
38
+ private
39
+
40
+ def default_logger
41
+ if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
42
+ Rails.logger
43
+ else
44
+ ::Logger.new($stdout, level: ::Logger::INFO)
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end
@@ -1,11 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "asktive_record/llm_service"
4
+ require "asktive_record/schema_loader"
5
+ require "asktive_record/log"
4
6
 
5
7
  module AsktiveRecord
6
8
  module Model
7
9
  # Provides class-level methods for AsktiveRecord models, enabling natural language queries and configuration checks.
8
10
  module ClassMethods
11
+ include AsktiveRecord::SchemaLoader
12
+
9
13
  def asktive_record
10
14
  return if AsktiveRecord.configuration
11
15
 
@@ -15,10 +19,10 @@ module AsktiveRecord
15
19
  end
16
20
 
17
21
  def ask(natural_language_query)
18
- ensure_api_key_configured!
22
+ validate_llm_api_key!
19
23
 
20
- schema_content = load_schema
21
- raise ConfigurationError, "Schema content is empty." if schema_content.to_s.strip.empty?
24
+ schema_content = load_schema_content
25
+ ensure_schema_is_not_empty!(schema_content)
22
26
 
23
27
  llm_service = AsktiveRecord::LlmService.new(AsktiveRecord.configuration)
24
28
  current_table_name = respond_to?(:table_name) ? table_name : name.downcase.pluralize
@@ -27,46 +31,6 @@ module AsktiveRecord
27
31
 
28
32
  AsktiveRecord::Query.new(natural_language_query, raw_sql, self)
29
33
  end
30
-
31
- private
32
-
33
- def ensure_api_key_configured!
34
- return if AsktiveRecord.configuration&.llm_api_key
35
-
36
- raise ConfigurationError, "LLM API key is not configured for AsktiveRecord."
37
- end
38
-
39
- def load_schema
40
- schema_path = AsktiveRecord.configuration.db_schema_path
41
- return File.read(schema_path) if File.exist?(schema_path)
42
-
43
- puts "Schema file not found at #{schema_path}. Attempting to generate it."
44
- try_dump_schema(schema_path) || try_structure_sql || raise_schema_error(schema_path)
45
- rescue SystemCallError => e
46
- raise ConfigurationError, "Error reading schema file at #{schema_path}: #{e.message}"
47
- end
48
-
49
- def try_dump_schema(schema_path)
50
- return unless defined?(Rails) && !AsktiveRecord.configuration.skip_dump_schema
51
-
52
- system("bin/rails db:schema:dump")
53
- File.exist?(schema_path) ? File.read(schema_path) : nil
54
- end
55
-
56
- def try_structure_sql
57
- path = "db/structure.sql"
58
- return unless File.exist?(path)
59
-
60
- puts "Using schema from #{path}"
61
- File.read(path)
62
- end
63
-
64
- def raise_schema_error(schema_path)
65
- raise ConfigurationError, <<~MSG.strip
66
- Database schema file not found at #{schema_path} or db/structure.sql.
67
- Please run `asktive_record:setup` or configure the correct path.
68
- MSG
69
- end
70
34
  end
71
35
  end
72
36
  end
@@ -1,72 +1,123 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module AsktiveRecord
4
- # Prompt class for generating SQL queries from natural language questions
4
+ # Prompt class for generating SQL queries from natural language questions.
5
+ # All user inputs are escaped before interpolation to mitigate prompt injection.
5
6
  class Prompt
6
- def self.as_human_answerer(question, query, response)
7
- <<~PROMPT
8
- Keep in mind the language of the question is in "#{question}".
9
- If thre responses seems like an ActiveRerecord::Result because probably it was running as inspec
10
- to be passed here, please convert it to a human-readable format. For example, get the @rows in the string
11
- and convert it to a human-readable format.
12
- Based on the provided schema, I ask about the following question:
13
- "#{question}" and you give me the following SQL generated:
14
- #{query}. So I executed the query and got the following result in my database:
15
- #{response}.
16
- Now I need you to answer the question based on what I asked you and the result I got.
17
- Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
18
- E.g. if the result is a list of users, you might say "There are 5 users in the database." or "The first user is John Doe." or "The average age of users is 30 years." depending on the context of the question.
19
- Answer in the same language as the question was asked in "#{question}"
20
- PROMPT
21
- end
7
+ # Characters that could be used for prompt injection
8
+ PROMPT_INJECTION_PATTERNS = [
9
+ /ignore\s+(all\s+)?previous\s+instructions/i,
10
+ /forget\s+(all\s+)?previous/i,
11
+ /disregard\s+(all\s+)?above/i,
12
+ /you\s+are\s+now/i,
13
+ /new\s+instructions?:/i,
14
+ /system\s*:/i
15
+ ].freeze
22
16
 
23
- def self.as_sql_generator(natural_language_query, schema_string)
24
- <<~PROMPT
25
- You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
26
- Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
17
+ class << self
18
+ def as_human_answerer(question, query, response)
19
+ safe_question = escape_input(question)
20
+ safe_query = escape_input(query.to_s)
21
+ safe_response = escape_input(response.to_s)
27
22
 
28
- Database Schema:
29
- ```sql
30
- #{schema_string}
31
- ```
23
+ <<~PROMPT
24
+ You are a helpful data assistant. Answer the user's question based on the SQL query result below.
25
+ Keep in mind the language of the question and answer in the same language.
32
26
 
33
- Natural Language Query: "#{natural_language_query}"
27
+ If the response looks like an ActiveRecord::Result (with @rows), convert it to a human-readable format
28
+ by extracting the relevant data from the rows.
34
29
 
35
- Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
36
- You should determine the appropriate table(s) to query from the schema and the natural language query.
37
- Use JOINs when necessary to query data across multiple tables.
30
+ Question: "#{safe_question}"
38
31
 
39
- Examples:
40
- - If the query is "show me all users", the output should be: SELECT * FROM users;
41
- - If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
42
- - If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
43
- - If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
32
+ SQL Query that was executed:
33
+ #{safe_query}
44
34
 
45
- SQL Query:
46
- PROMPT
47
- end
35
+ Query Result:
36
+ #{safe_response}
37
+
38
+ Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
39
+ For example:
40
+ - If the result is a list of users: "There are 5 users in the database."
41
+ - If the result is a single record: "The first user is John Doe."
42
+ - If the result is an aggregate: "The average age of users is 30 years."
43
+
44
+ Answer in the same language as the question.
45
+ PROMPT
46
+ end
47
+
48
+ def as_sql_generator(natural_language_query, schema_string)
49
+ safe_query = escape_input(natural_language_query)
50
+
51
+ <<~PROMPT
52
+ You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
53
+ Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
54
+
55
+ Database Schema:
56
+ ```sql
57
+ #{schema_string}
58
+ ```
59
+
60
+ Natural Language Query: "#{safe_query}"
61
+
62
+ Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
63
+ You should determine the appropriate table(s) to query from the schema and the natural language query.
64
+ Use JOINs when necessary to query data across multiple tables.
65
+
66
+ Examples:
67
+ - If the query is "show me all users", the output should be: SELECT * FROM users;
68
+ - If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
69
+ - If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
70
+ - If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
71
+
72
+ SQL Query:
73
+ PROMPT
74
+ end
75
+
76
+ def as_sql_generator_for_model(natural_language_query, schema_string, table_name)
77
+ safe_query = escape_input(natural_language_query)
78
+ safe_table = escape_input(table_name.to_s)
79
+
80
+ <<~PROMPT
81
+ You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
82
+ Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
83
+ The query should be for the table: #{safe_table}.
84
+
85
+ Database Schema:
86
+ ```sql
87
+ #{schema_string}
88
+ ```
89
+
90
+ Natural Language Query: "#{safe_query}"
91
+
92
+ Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
93
+ For example, if the query is "show me all users", and the table is `users`, the output should be:
94
+ SELECT * FROM users;
95
+ If the query is "find the last 5 registered users", the output should be:
96
+ SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
97
+
98
+ SQL Query:
99
+ PROMPT
100
+ end
101
+
102
+ private
48
103
 
49
- def self.as_sql_generator_for_model(natural_language_query, schema_string, table_name)
50
- <<~PROMPT
51
- You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
52
- Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
53
- The query should be for the table: #{table_name}.
104
+ # Escapes user input to mitigate prompt injection attacks.
105
+ # Strips known injection patterns and sanitizes special characters.
106
+ def escape_input(input)
107
+ return "" if input.nil?
54
108
 
55
- Database Schema:
56
- ```sql
57
- #{schema_string}
58
- ```
109
+ sanitized = input.to_s.dup
59
110
 
60
- Natural Language Query: "#{natural_language_query}"
111
+ # Remove known prompt injection patterns
112
+ PROMPT_INJECTION_PATTERNS.each do |pattern|
113
+ sanitized.gsub!(pattern, "[FILTERED]")
114
+ end
61
115
 
62
- Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
63
- For example, if the query is "show me all users", and the table is `users`, the output should be:
64
- SELECT * FROM users;
65
- If the query is "find the last 5 registered users", the output should be:
66
- SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
116
+ # Limit input length to prevent token abuse
117
+ sanitized = sanitized[0, 2000] if sanitized.length > 2000
67
118
 
68
- SQL Query:
69
- PROMPT
119
+ sanitized
120
+ end
70
121
  end
71
122
  end
72
123
  end