RubyGems - asktive_record - Versions diffs - 0.1.6 → 0.2.0 - Mend

asktive_record 0.1.6 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/.rubocop.yml +19 -1
data/CHANGELOG.md +52 -1
data/README.md +196 -114
data/lib/asktive_record/adapters/base.rb +56 -0
data/lib/asktive_record/adapters/openai.rb +62 -0
data/lib/asktive_record/configuration.rb +37 -4
data/lib/asktive_record/llm_service.rb +33 -56
data/lib/asktive_record/log.rb +49 -0
data/lib/asktive_record/model.rb +7 -43
data/lib/asktive_record/prompt.rb +105 -54
data/lib/asktive_record/query.rb +37 -26
data/lib/asktive_record/schema_loader.rb +63 -0
data/lib/asktive_record/service.rb +4 -50
data/lib/asktive_record/sql_sanitizer.rb +92 -0
data/lib/asktive_record/version.rb +1 -1
data/lib/asktive_record.rb +36 -2
data/lib/generators/asktive_record/templates/asktive_record_initializer.rb +29 -6
data/sig/asktive_record.rbs +177 -1
metadata +18 -29

data/lib/asktive_record/adapters/openai.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+require "openai"
+require "asktive_record/adapters/base"
+module AsktiveRecord
+  module Adapters
+    # OpenAI adapter for AsktiveRecord. Wraps the ruby-openai gem to provide
+    # LLM communication capabilities.
+    #
+    # @example
+    #   adapter = AsktiveRecord::Adapters::OpenAI.new(
+    #     api_key: ENV["OPENAI_API_KEY"],
+    #     model_name: "gpt-4o"
+    #   )
+    #   response = adapter.chat("Generate a SQL query...")
+    class OpenAI < Base
+      DEFAULT_MODEL = "gpt-4o-mini"
+      DEFAULT_TEMPERATURE = 0.2
+      DEFAULT_MAX_TOKENS = 250
+      def initialize(api_key:, model_name: nil)
+        super
+        @client = nil
+      end
+      # Send a prompt to OpenAI and return the text response.
+      #
+      # @param prompt [String] the prompt to send
+      # @param options [Hash] additional options
+      # @option options [Float] :temperature (0.2) the temperature for response generation
+      # @option options [Integer] :max_tokens (250) the maximum tokens in the response
+      # @return [String, nil] the text response from the LLM
+      def chat(prompt, options = {})
+        response = client.chat(
+          parameters: {
+            model: resolved_model_name,
+            messages: [{ role: "user", content: prompt }],
+            temperature: options.fetch(:temperature, DEFAULT_TEMPERATURE),
+            max_tokens: options.fetch(:max_tokens, DEFAULT_MAX_TOKENS)
+          }
+        )
+        response.dig("choices", 0, "message", "content")&.strip
+      rescue ::OpenAI::Error => e
+        raise ApiError, "OpenAI API error: #{e.message}"
+      end
+      # Returns the default model name for OpenAI.
+      #
+      # @return [String]
+      def default_model_name
+        DEFAULT_MODEL
+      end
+      private
+      def client
+        @client ||= ::OpenAI::Client.new(access_token: api_key)
+      end
+    end
+  end
+end

data/lib/asktive_record/configuration.rb CHANGED Viewed

@@ -2,17 +2,50 @@
 module AsktiveRecord
   # Configuration class for AsktiveRecord
-  # This class holds the configuration settings for the LLM provider, API key, model name
-  # and database schema path.
+  # This class holds the configuration settings for the LLM provider, API key, model name,
+  # database schema path, logging preferences, and adapter settings.
   class Configuration
-    attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path, :skip_dump_schema
+    attr_accessor :llm_provider, :llm_api_key, :llm_model_name, :db_schema_path,
+                  :skip_dump_schema, :logger, :read_only, :adapter,
+                  :temperature, :max_tokens, :cache_enabled
     def initialize
       @llm_provider = :openai # Default LLM provider
       @llm_api_key = nil
-      @llm_model_name = "gpt-3.5-turbo" # Default model for OpenAI
+      @llm_model_name = "gpt-4o-mini" # Default model for OpenAI (gpt-3.5-turbo is deprecated)
       @db_schema_path = "db/schema.rb" # Default path for Rails schema file
       @skip_dump_schema = false # Default is to not skip schema dump
+      @logger = nil # Will use AsktiveRecord::Log default if nil
+      @read_only = true # Default to read-only mode (SELECT only)
+      @adapter = nil # Will be built from llm_provider if nil
+      @temperature = 0.2 # Default temperature for LLM
+      @max_tokens = 250 # Default max tokens for LLM response
+      @cache_enabled = false # Disabled by default
+    end
+    # Builds and returns the appropriate adapter based on configuration.
+    # If a custom adapter is set, returns it directly.
+    # Otherwise, builds one from llm_provider setting.
+    #
+    # @return [AsktiveRecord::Adapters::Base] the configured adapter
+    def resolved_adapter
+      return @adapter if @adapter
+      build_adapter_from_provider
+    end
+    private
+    def build_adapter_from_provider
+      case llm_provider
+      when :openai
+        require "asktive_record/adapters/openai"
+        Adapters::OpenAI.new(api_key: llm_api_key, model_name: llm_model_name)
+      else
+        raise ConfigurationError,
+              "Unsupported LLM provider: #{llm_provider}. " \
+              "Supported providers: :openai. Or set a custom adapter via config.adapter."
+      end
     end
   end
 end

data/lib/asktive_record/llm_service.rb CHANGED Viewed

@@ -1,123 +1,100 @@
 # frozen_string_literal: true
-require "openai"
 require "asktive_record/prompt"
+require "asktive_record/log"
 module AsktiveRecord
   # Service class for interacting with the LLM API to generate SQL queries
   # and answer questions based on the generated queries and database responses.
+  # Uses the adapter pattern to support multiple LLM providers.
   class LlmService
     attr_reader :configuration
     def initialize(configuration)
       @configuration = configuration
+      @adapter = nil
       return if @configuration&.llm_api_key
       raise ConfigurationError,
-            "LLM API key is not configured. Please set it in config/initializers/asktive_record.rb\
- or via environment variable."
+            "LLM API key is not configured. Please set it in config/initializers/asktive_record.rb " \
+            "or via environment variable."
     end
-    # Placeholder for schema upload/management with the LLM if needed for more advanced scenarios
+    # Placeholder for schema upload/management with the LLM if needed for more advanced scenarios.
     # For instance, if using OpenAI Assistants API or fine-tuning.
     # For now, the schema is passed with each query.
     def upload_schema(_schema_string)
-      # This could be used to upload schema to a vector store or a fine-tuning dataset in the future.
-      puts "Schema upload functionality is a placeholder for now."
+      AsktiveRecord::Log.info("Schema upload functionality is a placeholder for now.")
       true
     end
     def answer(question, query, response)
-      puts "Answering question: #{question}"
-      puts "Generated SQL query: #{query}"
-      puts "Response from database: #{response.inspect}"
+      AsktiveRecord::Log.info("Answering question: #{question}")
+      AsktiveRecord::Log.debug("Generated SQL query: #{query}")
+      AsktiveRecord::Log.debug("Response from database: #{response.inspect}")
       answer_as_human(question, query, response)
     end
-    # Original method for model-specific queries
+    # Method for model-specific queries
     def generate_sql(natural_language_query, schema_string, table_name)
-      client = OpenAI::Client.new(access_token: configuration.llm_api_key)
       prompt = Prompt.as_sql_generator_for_model(
         natural_language_query,
         schema_string,
         table_name
       )
-      generate_and_validate_sql(client, prompt)
+      generate_and_validate_sql(prompt)
     end
-    # New method for service-class-based queries that can target any table
+    # Method for service-class-based queries that can target any table
     def generate_sql_for_service(natural_language_query, schema_string, _target_table = "any")
-      client = OpenAI::Client.new(access_token: configuration.llm_api_key)
       prompt = Prompt.as_sql_generator(natural_language_query, schema_string)
-      generate_and_validate_sql(client, prompt)
+      generate_and_validate_sql(prompt)
     end
     private
     def answer_as_human(question, query, response)
       prompt = Prompt.as_human_answerer(question, query, response)
-      client = build_client
-      llm_response = call_llm(client, prompt)
-      extract_answer(llm_response)
-    rescue OpenAI::Error => e
-      raise ApiError, "OpenAI API error: #{e.message}"
+      adapter.chat(prompt, llm_options)
+    rescue ApiError
+      raise
     rescue StandardError => e
       raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
     end
-    def build_client
-      OpenAI::Client.new(access_token: configuration.llm_api_key)
-    end
-    def call_llm(client, prompt)
-      client.chat(
-        parameters: {
-          model: configuration.llm_model_name || "gpt-3.5-turbo",
-          messages: [{ role: "user", content: prompt }],
-          temperature: 0.2,
-          max_tokens: 250
-        }
-      )
+    def adapter
+      @adapter ||= configuration.resolved_adapter
     end
-    def extract_answer(response)
-      response.dig("choices", 0, "message", "content")&.strip
+    def llm_options
+      {
+        temperature: configuration.temperature || 0.2,
+        max_tokens: configuration.max_tokens || 250
+      }
     end
-    def generate_and_validate_sql(client, prompt)
-      raw_sql = fetch_sql_from_llm(client, prompt)
+    def generate_and_validate_sql(prompt)
+      raw_sql = adapter.chat(prompt, llm_options)
       validate_sql_response!(raw_sql)
-      sanitize_sql(raw_sql)
-    rescue OpenAI::Error => e
-      raise ApiError, "OpenAI API error: #{e.message}"
+      clean_sql(raw_sql)
+    rescue ApiError
+      raise
     rescue StandardError => e
       raise QueryGenerationError, "Failed to generate SQL query: #{e.message}"
     end
-    def fetch_sql_from_llm(client, prompt)
-      response = client.chat(
-        parameters: {
-          model: configuration.llm_model_name || "gpt-3.5-turbo",
-          messages: [{ role: "user", content: prompt }],
-          temperature: 0.2,
-          max_tokens: 250
-        }
-      )
-      response.dig("choices", 0, "message", "content")&.strip
-    end
     def validate_sql_response!(raw_sql)
       raise QueryGenerationError, "LLM did not return a SQL query." if raw_sql.nil? || raw_sql.empty?
-      return if raw_sql.downcase.start_with?("select")
+      return if raw_sql.strip.match?(/\ASELECT\b/i)
       raise QueryGenerationError, "LLM generated a non-SELECT query: #{raw_sql}"
     end
-    def sanitize_sql(sql)
-      sql.chomp(";")
+    def clean_sql(sql)
+      sql.gsub(/;\s*\z/, "").strip
     end
   end
 end

data/lib/asktive_record/log.rb ADDED Viewed

@@ -0,0 +1,49 @@
+# frozen_string_literal: true
+require "logger"
+module AsktiveRecord
+  # Provides a configurable logging interface for AsktiveRecord.
+  # Defaults to Rails.logger when available, or a standard Logger to $stdout.
+  #
+  # Usage:
+  #   AsktiveRecord.logger.info("Something happened")
+  #   AsktiveRecord.logger.debug("Debug info")
+  module Log
+    PREFIX = "[AsktiveRecord]"
+    class << self
+      def logger
+        @logger ||= default_logger
+      end
+      attr_writer :logger
+      def info(message)
+        logger.info("#{PREFIX} #{message}")
+      end
+      def debug(message)
+        logger.debug("#{PREFIX} #{message}")
+      end
+      def warn(message)
+        logger.warn("#{PREFIX} #{message}")
+      end
+      def error(message)
+        logger.error("#{PREFIX} #{message}")
+      end
+      private
+      def default_logger
+        if defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
+          Rails.logger
+        else
+          ::Logger.new($stdout, level: ::Logger::INFO)
+        end
+      end
+    end
+  end
+end

data/lib/asktive_record/model.rb CHANGED Viewed

@@ -1,11 +1,15 @@
 # frozen_string_literal: true
 require "asktive_record/llm_service"
+require "asktive_record/schema_loader"
+require "asktive_record/log"
 module AsktiveRecord
   module Model
     # Provides class-level methods for AsktiveRecord models, enabling natural language queries and configuration checks.
     module ClassMethods
+      include AsktiveRecord::SchemaLoader
       def asktive_record
         return if AsktiveRecord.configuration
@@ -15,10 +19,10 @@ module AsktiveRecord
       end
       def ask(natural_language_query)
-        ensure_api_key_configured!
+        validate_llm_api_key!
-        schema_content = load_schema
-        raise ConfigurationError, "Schema content is empty." if schema_content.to_s.strip.empty?
+        schema_content = load_schema_content
+        ensure_schema_is_not_empty!(schema_content)
         llm_service = AsktiveRecord::LlmService.new(AsktiveRecord.configuration)
         current_table_name = respond_to?(:table_name) ? table_name : name.downcase.pluralize
@@ -27,46 +31,6 @@ module AsktiveRecord
         AsktiveRecord::Query.new(natural_language_query, raw_sql, self)
       end
-      private
-      def ensure_api_key_configured!
-        return if AsktiveRecord.configuration&.llm_api_key
-        raise ConfigurationError, "LLM API key is not configured for AsktiveRecord."
-      end
-      def load_schema
-        schema_path = AsktiveRecord.configuration.db_schema_path
-        return File.read(schema_path) if File.exist?(schema_path)
-        puts "Schema file not found at #{schema_path}. Attempting to generate it."
-        try_dump_schema(schema_path) || try_structure_sql || raise_schema_error(schema_path)
-      rescue SystemCallError => e
-        raise ConfigurationError, "Error reading schema file at #{schema_path}: #{e.message}"
-      end
-      def try_dump_schema(schema_path)
-        return unless defined?(Rails) && !AsktiveRecord.configuration.skip_dump_schema
-        system("bin/rails db:schema:dump")
-        File.exist?(schema_path) ? File.read(schema_path) : nil
-      end
-      def try_structure_sql
-        path = "db/structure.sql"
-        return unless File.exist?(path)
-        puts "Using schema from #{path}"
-        File.read(path)
-      end
-      def raise_schema_error(schema_path)
-        raise ConfigurationError, <<~MSG.strip
-          Database schema file not found at #{schema_path} or db/structure.sql.
-          Please run `asktive_record:setup` or configure the correct path.
-        MSG
-      end
     end
   end
 end

data/lib/asktive_record/prompt.rb CHANGED Viewed

@@ -1,72 +1,123 @@
 # frozen_string_literal: true
 module AsktiveRecord
-  # Prompt class for generating SQL queries from natural language questions
+  # Prompt class for generating SQL queries from natural language questions.
+  # All user inputs are escaped before interpolation to mitigate prompt injection.
   class Prompt
-    def self.as_human_answerer(question, query, response)
-      <<~PROMPT
-        Keep in mind the language of the question is in "#{question}".
-        If thre responses seems like an ActiveRerecord::Result because probably it was running as inspec
-        to be passed here, please convert it to a human-readable format. For example, get the @rows in the string
-        and convert it to a human-readable format.
-        Based on the provided schema, I ask about the following question:
-        "#{question}" and you give me the following SQL generated:
-        #{query}. So I executed the query and got the following result in my database:
-        #{response}.
-        Now I need you to answer the question based on what I asked you and the result I got.
-        Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
-        E.g. if the result is a list of users, you might say "There are 5 users in the database." or "The first user is John Doe." or "The average age of users is 30 years." depending on the context of the question.
-        Answer in the same language as the question was asked in "#{question}"
-      PROMPT
-    end
+    # Characters that could be used for prompt injection
+    PROMPT_INJECTION_PATTERNS = [
+      /ignore\s+(all\s+)?previous\s+instructions/i,
+      /forget\s+(all\s+)?previous/i,
+      /disregard\s+(all\s+)?above/i,
+      /you\s+are\s+now/i,
+      /new\s+instructions?:/i,
+      /system\s*:/i
+    ].freeze
-    def self.as_sql_generator(natural_language_query, schema_string)
-      <<~PROMPT
-        You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
-        Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
+    class << self
+      def as_human_answerer(question, query, response)
+        safe_question = escape_input(question)
+        safe_query = escape_input(query.to_s)
+        safe_response = escape_input(response.to_s)
-        Database Schema:
-        ```sql
-        #{schema_string}
-        ```
+        <<~PROMPT
+          You are a helpful data assistant. Answer the user's question based on the SQL query result below.
+          Keep in mind the language of the question and answer in the same language.
-        Natural Language Query: "#{natural_language_query}"
+          If the response looks like an ActiveRecord::Result (with @rows), convert it to a human-readable format
+          by extracting the relevant data from the rows.
-        Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
-        You should determine the appropriate table(s) to query from the schema and the natural language query.
-        Use JOINs when necessary to query data across multiple tables.
+          Question: "#{safe_question}"
-        Examples:
-        - If the query is "show me all users", the output should be: SELECT * FROM users;
-        - If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
-        - If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
-        - If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
+          SQL Query that was executed:
+          #{safe_query}
-        SQL Query:
-      PROMPT
-    end
+          Query Result:
+          #{safe_response}
+          Please provide a concise answer based on the result as a human would, without any SQL or technical jargon.
+          For example:
+          - If the result is a list of users: "There are 5 users in the database."
+          - If the result is a single record: "The first user is John Doe."
+          - If the result is an aggregate: "The average age of users is 30 years."
+          Answer in the same language as the question.
+        PROMPT
+      end
+      def as_sql_generator(natural_language_query, schema_string)
+        safe_query = escape_input(natural_language_query)
+        <<~PROMPT
+          You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
+          Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
+          Database Schema:
+          ```sql
+          #{schema_string}
+          ```
+          Natural Language Query: "#{safe_query}"
+          Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
+          You should determine the appropriate table(s) to query from the schema and the natural language query.
+          Use JOINs when necessary to query data across multiple tables.
+          Examples:
+          - If the query is "show me all users", the output should be: SELECT * FROM users;
+          - If the query is "find the last 5 registered users", the output should be: SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
+          - If the query is "show me products with their categories", the output might be: SELECT products.*, categories.name as category_name FROM products JOIN categories ON products.category_id = categories.id;
+          - If the query is "which is the cheapest product", the output might be: SELECT * FROM products ORDER BY price ASC LIMIT 1;
+          SQL Query:
+        PROMPT
+      end
+      def as_sql_generator_for_model(natural_language_query, schema_string, table_name)
+        safe_query = escape_input(natural_language_query)
+        safe_table = escape_input(table_name.to_s)
+        <<~PROMPT
+          You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
+          Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
+          The query should be for the table: #{safe_table}.
+          Database Schema:
+          ```sql
+          #{schema_string}
+          ```
+          Natural Language Query: "#{safe_query}"
+          Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
+          For example, if the query is "show me all users", and the table is `users`, the output should be:
+          SELECT * FROM users;
+          If the query is "find the last 5 registered users", the output should be:
+          SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
+          SQL Query:
+        PROMPT
+      end
+      private
-    def self.as_sql_generator_for_model(natural_language_query, schema_string, table_name)
-      <<~PROMPT
-        You are an expert SQL generator. Your task is to convert a natural language query into a SQL query for a database with the following schema.
-        Only generate SELECT queries. Do not generate any INSERT, UPDATE, DELETE, DROP, or other DDL/DML statements.
-        The query should be for the table: #{table_name}.
+      # Escapes user input to mitigate prompt injection attacks.
+      # Strips known injection patterns and sanitizes special characters.
+      def escape_input(input)
+        return "" if input.nil?
-        Database Schema:
-        ```sql
-        #{schema_string}
-        ```
+        sanitized = input.to_s.dup
-        Natural Language Query: "#{natural_language_query}"
+        # Remove known prompt injection patterns
+        PROMPT_INJECTION_PATTERNS.each do |pattern|
+          sanitized.gsub!(pattern, "[FILTERED]")
+        end
-        Based on the schema and the natural language query, provide only the SQL query as a single line of text, without any explanation or surrounding text.
-        For example, if the query is "show me all users", and the table is `users`, the output should be:
-        SELECT * FROM users;
-        If the query is "find the last 5 registered users", the output should be:
-        SELECT * FROM users ORDER BY created_at DESC LIMIT 5;
+        # Limit input length to prevent token abuse
+        sanitized = sanitized[0, 2000] if sanitized.length > 2000
-        SQL Query:
-      PROMPT
+        sanitized
+      end
     end
   end
 end