RubyGems - boxcars - Versions diffs - 0.4.5 → 0.4.6 - Mend

boxcars 0.4.5 → 0.4.6

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/boxcars/boxcar/json_engine_boxcar.rb +4 -1
data/lib/boxcars/engine/anthropic.rb +0 -7
data/lib/boxcars/engine/openai.rb +0 -14
data/lib/boxcars/engine/perplexityai.rb +196 -0
data/lib/boxcars/engine.rb +1 -0
data/lib/boxcars/version.rb +1 -1
data/lib/boxcars/x_node.rb +1 -0
data/perplexity_example.rb +28 -0
metadata +4 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 10f7d544a53712e622028ebb6dc8e52c9dec9497f6e4436ef0ab456b27885c1e
-  data.tar.gz: 44e91be763215a67dec30899f9155b59b43397c225afe3a7e5677b888bc39056
+  metadata.gz: d6979baf9aa7c8dfb0c4f852b72ec597d92339531aa9d61055530ca73065965e
+  data.tar.gz: 8fdb73c699d0524d64a5f340adfd25f893402739e8598c16e8a5f1c402569d56
 SHA512:
-  metadata.gz: ac6c9bd9ff37d2c7ead36371f26f968fdce01ee5f5cfdcc1c9f876d6a879680c10faef74a285ee0fc1e8a1c23656ebe7c650be7d6d04370884566cc09942aa32
-  data.tar.gz: 58aaf3c88912de6cd2b6746a7a2b990f05227c9c9369e4fd3b64811f5ed479dc86608c7a60d8f9ce80cd1d26daefa565a6a3d925091190b4d36acfef4b344fc0
+  metadata.gz: c595f68c3e29cefe105a93e9e95889d8448d66229f03ae3ca3ddfa3bbb62325dcaea240929344c09b3b6e8911068c06d9d0367a07b78f574e2854019ac0291d0
+  data.tar.gz: 34a2ef8446c0c05179fd51de46ca6fe31d0629aed3115bef25d31f6e6285913315918f4872e2f06af7fbda6b07beaf0da1fd940a20d4a94f5a526a85b92f2204

data/lib/boxcars/boxcar/json_engine_boxcar.rb CHANGED Viewed

@@ -43,7 +43,10 @@ module Boxcars
     # @param engine_output [String] The output from the engine.
     # @return [Result] The result.
     def get_answer(engine_output)
-      extract_answer(JSON.parse(engine_output))
+      # sometimes the LLM adds text in front of the JSON output, so let's strip it here
+      json_start = engine_output.index("{")
+      json_end = engine_output.rindex("}")
+      extract_answer(JSON.parse(engine_output[json_start..json_end]))
     rescue StandardError => e
       Result.from_error("Error: #{e.message}:\n#{engine_output}")
     end

data/lib/boxcars/engine/anthropic.rb CHANGED Viewed

@@ -138,13 +138,6 @@ module Boxcars
     end
     # rubocop:enable Metrics/AbcSize
-    # the identifying parameters for the engine
-    def identifying_params
-      params = { model_name: model_name }
-      params.merge!(default_params)
-      params
-    end
     # the engine type
     def engine_type
       "claude"

data/lib/boxcars/engine/openai.rb CHANGED Viewed

@@ -83,13 +83,6 @@ module Boxcars
       answer
     end
-    # Build extra kwargs from additional params that were passed in.
-    # @param values [Hash] The values to build extra kwargs from.
-    def build_extra(values:)
-      values[:model_kw_args] = @open_ai_params.merge(values)
-      values
-    end
     # Get the default parameters for the engine.
     def default_params
       open_ai_params
@@ -163,13 +156,6 @@ module Boxcars
     # rubocop:enable Metrics/AbcSize
   end
-  # the identifying parameters for the engine
-  def identifying_params
-    params = { model_name: model_name }
-    params.merge!(default_params)
-    params
-  end
   # the engine type
   def engine_type
     "openai"

data/lib/boxcars/engine/perplexityai.rb ADDED Viewed

@@ -0,0 +1,196 @@
+# frozen_string_literal: true
+# Boxcars is a framework for running a series of tools to get an answer to a question.
+module Boxcars
+  # A engine that uses OpenAI's API.
+  class Perplexityai < Engine
+    attr_reader :prompts, :perplexity_params, :model_kwargs, :batch_size
+    # The default parameters to use when asking the engine.
+    DEFAULT_PER_PARAMS = {
+      model: "llama-2-70b-chat",
+      temperature: 0.1,
+      max_tokens: 3200
+    }.freeze
+    # the default name of the engine
+    DEFAULT_PER_NAME = "PerplexityAI engine"
+    # the default description of the engine
+    DEFAULT_PER_DESCRIPTION = "useful for when you need to use AI to answer questions. " \
+                              "You should ask targeted questions"
+    # A engine is a container for a single tool to run.
+    # @param name [String] The name of the engine. Defaults to "PerplexityAI engine".
+    # @param description [String] A description of the engine. Defaults to:
+    #        useful for when you need to use AI to answer questions. You should ask targeted questions".
+    # @param prompts [Array<String>] The prompts to use when asking the engine. Defaults to [].
+    # @param batch_size [Integer] The number of prompts to send to the engine at once. Defaults to 20.
+    def initialize(name: DEFAULT_PER_NAME, description: DEFAULT_PER_DESCRIPTION, prompts: [], batch_size: 20, **kwargs)
+      @perplexity_params = DEFAULT_PER_PARAMS.merge(kwargs)
+      @prompts = prompts
+      @batch_size = batch_size
+      super(description: description, name: name)
+    end
+    def conversation_model?(model)
+      ["mistral-7b-instruct", "llama-2-13b-chat", "llama-2-70b-chat", "openhermes-2-mistral-7b"].include?(model)
+    end
+    def chat(parameters:)
+      url = URI("https://api.perplexity.ai/chat/completions")
+      http = Net::HTTP.new(url.host, url.port)
+      http.use_ssl = true
+      request = Net::HTTP::Post.new(url)
+      request["accept"] = 'application/json'
+      request["authorization"] = "Bearer #{ENV.fetch('PERPLEXITY_API_KEY')}"
+      request["content-type"] = 'application/json'
+      the_body = {
+        model: (parameters[:model] || "mistral-7b-instruct"),
+        messages: parameters[:messages]
+      }
+      request.body = the_body.to_json
+      response = http.request(request)
+      JSON.parse(response.read_body)
+    end
+    # Get an answer from the engine.
+    # @param prompt [String] The prompt to use when asking the engine.
+    # @param openai_access_token [String] The access token to use when asking the engine.
+    #   Defaults to Boxcars.configuration.openai_access_token.
+    # @param kwargs [Hash] Additional parameters to pass to the engine if wanted.
+    def client(prompt:, inputs: {}, **kwargs)
+      prompt = prompt.first if prompt.is_a?(Array)
+      params = prompt.as_messages(inputs).merge(default_params).merge(kwargs)
+      params[:model] ||= "llama-2-70b-chat"
+      if Boxcars.configuration.log_prompts
+        Boxcars.debug(params[:messages].last(2).map { |p| ">>>>>> Role: #{p[:role]} <<<<<<\n#{p[:content]}" }.join("\n"), :cyan)
+      end
+      chat(parameters: params)
+    end
+    # get an answer from the engine for a question.
+    # @param question [String] The question to ask the engine.
+    # @param kwargs [Hash] Additional parameters to pass to the engine if wanted.
+    def run(question, **kwargs)
+      prompt = Prompt.new(template: question)
+      response = client(prompt: prompt, **kwargs)
+      raise Error, "PerplexityAI: No response from API" unless response
+      raise Error, "PerplexityAI: #{response['error']}" if response["error"]
+      answer = response["choices"].map { |c| c.dig("message", "content") || c["text"] }.join("\n").strip
+      puts answer
+      answer
+    end
+    # Get the default parameters for the engine.
+    def default_params
+      perplexity_params
+    end
+    # Get generation informaton
+    # @param sub_choices [Array<Hash>] The choices to get generation info for.
+    # @return [Array<Generation>] The generation information.
+    def generation_info(sub_choices)
+      sub_choices.map do |choice|
+        Generation.new(
+          text: choice.dig("message", "content") || choice["text"],
+          generation_info: {
+            finish_reason: choice.fetch("finish_reason", nil),
+            logprobs: choice.fetch("logprobs", nil)
+          }
+        )
+      end
+    end
+    # make sure we got a valid response
+    # @param response [Hash] The response to check.
+    # @param must_haves [Array<String>] The keys that must be in the response. Defaults to %w[choices].
+    # @raise [KeyError] if there is an issue with the access token.
+    # @raise [ValueError] if the response is not valid.
+    def check_response(response, must_haves: %w[choices])
+      if response['error']
+        code = response.dig('error', 'code')
+        msg = response.dig('error', 'message') || 'unknown error'
+        raise KeyError, "PERPLEXITY_API_KEY not valid" if code == 'invalid_api_key'
+        raise ValueError, "PerplexityAI error: #{msg}"
+      end
+      must_haves.each do |key|
+        raise ValueError, "Expecting key #{key} in response" unless response.key?(key)
+      end
+    end
+    # Call out to OpenAI's endpoint with k unique prompts.
+    # @param prompts [Array<String>] The prompts to pass into the model.
+    # @param inputs [Array<String>] The inputs to subsitite into the prompt.
+    # @param stop [Array<String>] Optional list of stop words to use when generating.
+    # @return [EngineResult] The full engine output.
+    def generate(prompts:, stop: nil)
+      params = {}
+      params[:stop] = stop if stop
+      choices = []
+      token_usage = {}
+      # Get the token usage from the response.
+      # Includes prompt, completion, and total tokens used.
+      inkeys = %w[completion_tokens prompt_tokens total_tokens].freeze
+      prompts.each_slice(batch_size) do |sub_prompts|
+        sub_prompts.each do |sprompts, inputs|
+          response = client(prompt: sprompts, inputs: inputs, **params)
+          check_response(response)
+          choices.concat(response["choices"])
+          usage_keys = inkeys & response["usage"].keys
+          usage_keys.each { |key| token_usage[key] = token_usage[key].to_i + response["usage"][key] }
+        end
+      end
+      n = params.fetch(:n, 1)
+      generations = []
+      prompts.each_with_index do |_prompt, i|
+        sub_choices = choices[i * n, (i + 1) * n]
+        generations.push(generation_info(sub_choices))
+      end
+      EngineResult.new(generations: generations, engine_output: { token_usage: token_usage })
+    end
+    # rubocop:enable Metrics/AbcSize
+  end
+  # the engine type
+  def engine_type
+    "perplexityai"
+  end
+  # calculate the number of tokens used
+  def get_num_tokens(text:)
+    text.split.length # TODO: hook up to token counting gem
+  end
+  # lookup the context size for a model by name
+  # @param modelname [String] The name of the model to lookup.
+  def modelname_to_contextsize(modelname)
+    model_lookup = {
+      'text-davinci-003': 4097,
+      'text-curie-001': 2048,
+      'text-babbage-001': 2048,
+      'text-ada-001': 2048,
+      'code-davinci-002': 8000,
+      'code-cushman-001': 2048,
+      'gpt-3.5-turbo-1': 4096
+    }.freeze
+    model_lookup[modelname] || 4097
+  end
+  # Calculate the maximum number of tokens possible to generate for a prompt.
+  # @param prompt_text [String] The prompt text to use.
+  # @return [Integer] the number of tokens possible to generate.
+  def max_tokens_for_prompt(prompt_text)
+    num_tokens = get_num_tokens(prompt_text)
+    # get max context size for model by name
+    max_size = modelname_to_contextsize(model_name)
+    max_size - num_tokens
+  end
+end

data/lib/boxcars/engine.rb CHANGED Viewed

@@ -22,4 +22,5 @@ end
 require "boxcars/engine/engine_result"
 require "boxcars/engine/anthropic"
 require "boxcars/engine/openai"
+require "boxcars/engine/perplexityai"
 require "boxcars/engine/gpt4all_eng"

data/lib/boxcars/version.rb CHANGED Viewed

@@ -2,5 +2,5 @@
 module Boxcars
   # The current version of the gem.
-  VERSION = "0.4.5"
+  VERSION = "0.4.6"
 end

data/lib/boxcars/x_node.rb CHANGED Viewed

@@ -29,6 +29,7 @@ module Boxcars
     def self.from_xml(xml)
       xml = xml[xml.index("<")..-1] unless xml.start_with?("<")
+      xml = xml[0..xml.rindex(">")] unless xml.end_with?(">")
       doc = Nokogiri::XML.parse(xml)
       if doc.errors.any?
         Boxcars.debug("XML: #{xml}", :yellow)

data/perplexity_example.rb ADDED Viewed

@@ -0,0 +1,28 @@
+require "debug"
+require "dotenv/load"
+require "boxcars"
+# Boxcars.configuration.logger = Logger.new($stdout)
+eng = Boxcars::Perplexityai.new
+# eng = Boxcars::Openai.new(model: "gpt-4")
+ctemplate = [
+  Boxcars::Boxcar.syst("The user will type in a city name. Your job is to evaluate if the given city is a good place to live. " \
+                       "Build a comprehensive report about livability, weather, cost of living, crime rate, drivability, " \
+                       "walkability, and bike ability, and direct flights. In the final answer, for the first paragraph, " \
+                       "summarize the pros and cons of living in the city followed by the background information and links " \
+                       "for the research. Finalize your answer with an overall grade from A to F on the city."),
+  Boxcars::Boxcar.user("%<input>s")
+]
+conv = Boxcars::Conversation.new(lines: ctemplate)
+conversation_prompt = Boxcars::ConversationPrompt.new(conversation: conv, input_variables: [:input], other_inputs: [],
+                                                      output_variables: [:answer])
+boxcar = Boxcars::EngineBoxcar.new(engine: eng, name: "City Helper", prompt: conversation_prompt,
+                                   description: "Evaluate if a city is a good place to live.")
+data = boxcar.run(ARGV.fetch(0, "San Francisco"))
+# train = Boxcars.train.new(boxcars: [boxcar])
+# data = train.run()
+# debugger
+puts data

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: boxcars
 version: !ruby/object:Gem::Version
-  version: 0.4.5
+  version: 0.4.6
 platform: ruby
 authors:
 - Francis Sullivan
@@ -9,7 +9,7 @@ authors:
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-10-06 00:00:00.000000000 Z
+date: 2023-11-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: anthropic
@@ -154,6 +154,7 @@ files:
 - lib/boxcars/engine/engine_result.rb
 - lib/boxcars/engine/gpt4all_eng.rb
 - lib/boxcars/engine/openai.rb
+- lib/boxcars/engine/perplexityai.rb
 - lib/boxcars/generation.rb
 - lib/boxcars/observation.rb
 - lib/boxcars/prompt.rb
@@ -184,6 +185,7 @@ files:
 - lib/boxcars/vector_store/split_text.rb
 - lib/boxcars/version.rb
 - lib/boxcars/x_node.rb
+- perplexity_example.rb
 - run.json
 homepage: https://github.com/BoxcarsAI/boxcars
 licenses: