RubyGems - langchainrb - Versions diffs - 0.5.7 → 0.6.0 - Mend

langchainrb 0.5.7 → 0.6.0

Files changed (22) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/Gemfile.lock +11 -8
data/README.md +4 -4
data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb +104 -0
data/lib/langchain/agent/base.rb +1 -1
data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} +5 -5
data/lib/langchain/data.rb +8 -0
data/lib/langchain/llm/ai21.rb +6 -2
data/lib/langchain/llm/cohere.rb +7 -3
data/lib/langchain/llm/google_palm.rb +3 -12
data/lib/langchain/loader.rb +41 -9
data/lib/langchain/output_parsers/base.rb +45 -0
data/lib/langchain/output_parsers/structured.rb +91 -0
data/lib/langchain/tool/base.rb +1 -1
data/lib/langchain/tool/database.rb +20 -10
data/lib/langchain/utils/token_length/ai21_validator.rb +36 -0
data/lib/langchain/utils/token_length/cohere_validator.rb +44 -0
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +10 -3
metadata +29 -10
/data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ba5e9e8257d18c0940fdaf4fe84c03d594d8f1151e40e1bb35de059f8e6e5094
-  data.tar.gz: 11310635819502b9bfbd66bc45dc7aa1ce500d4a874dcc5ab550d6c5edf7194f
+  metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
+  data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
 SHA512:
-  metadata.gz: 4b97e21bcbc0c5f1d842271b64949c07d6d78190cd97c22fd0dab735d6b6ae2f2e6328ba2631dfc77ed0a5dd227573e3f84f064e8dd9332701848a798747ac9a
-  data.tar.gz: 267b2029de10acf45bb97a040d174102f666e048aaaf03ab76218cd5281574c1ae977ba8e975faf4b690e677611daba2fb0fc975801c0e41072f050ec2ac2e34
+  metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
+  data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 ## [Unreleased]
+## [0.6.0] - 2023-06-22
+- [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
+- Implement A21 token validator
+- Add `Langchain::OutputParsers`
 ## [0.5.7] - 2023-06-19
 - Developer can modify models used when initiliazing `Langchain::LLM::*` clients
 - Improvements to the `SQLQueryAgent` and the database tool

data/Gemfile.lock CHANGED Viewed

@@ -1,9 +1,10 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.5.7)
+    langchainrb (0.6.0)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
+      json-schema (~> 4.0.0)
       tiktoken_ruby (~> 0.0.5)
 GEM
@@ -31,7 +32,7 @@ GEM
     addressable (2.8.4)
       public_suffix (>= 2.0.2, < 6.0)
     afm (0.2.2)
-    ai21 (0.2.0)
+    ai21 (0.2.1)
     ast (2.4.2)
     baran (0.1.6)
     builder (3.2.4)
@@ -41,7 +42,7 @@ GEM
       dry-monads (~> 1.6)
       ruby-next-core (>= 0.15.0)
     coderay (1.1.3)
-    cohere-ruby (0.9.4)
+    cohere-ruby (0.9.5)
       faraday (>= 1.0.0)
       faraday_middleware (>= 1.0.0)
     colorize (0.8.1)
@@ -124,7 +125,7 @@ GEM
     faraday-retry (1.0.3)
     faraday_middleware (1.2.0)
       faraday (~> 1.0)
-    google_palm_api (0.1.1)
+    google_palm_api (0.1.2)
       faraday (>= 1.0.0)
       faraday_middleware (>= 1.0.0)
     google_search_results (2.0.1)
@@ -148,6 +149,8 @@ GEM
       concurrent-ruby (~> 1.0)
     ice_nine (0.11.2)
     json (2.6.3)
+    json-schema (4.0.0)
+      addressable (>= 2.8)
     language_server-protocol (3.17.0.3)
     lint_roller (1.0.0)
     loofah (2.21.1)
@@ -219,7 +222,7 @@ GEM
       zeitwerk (~> 2.5)
     rainbow (3.1.1)
     rake (13.0.6)
-    rb_sys (0.9.78)
+    rb_sys (0.9.79)
     rdiscount (2.2.7)
     regexp_parser (2.8.0)
     replicate-ruby (0.2.2)
@@ -313,13 +316,13 @@ PLATFORMS
   x86_64-linux
 DEPENDENCIES
-  ai21 (~> 0.2.0)
+  ai21 (~> 0.2.1)
   chroma-db (~> 0.3.0)
-  cohere-ruby (~> 0.9.4)
+  cohere-ruby (~> 0.9.5)
   docx (~> 0.8.0)
   dotenv-rails (~> 2.7.6)
   eqn (~> 1.6.5)
-  google_palm_api (~> 0.1.1)
+  google_palm_api (~> 0.1.2)
   google_search_results (~> 2.0.0)
   hnswlib (~> 0.8.1)
   hugging-face (~> 0.3.4)

data/README.md CHANGED Viewed

@@ -155,13 +155,13 @@ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
 ```
 #### Google PaLM (Pathways Language Model)
-Add `"google_palm_api", "~> 0.1.1"` to your Gemfile.
+Add `"google_palm_api", "~> 0.1.2"` to your Gemfile.
 ```ruby
 google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 ```
 #### AI21
-Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
+Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
 ```ruby
 ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
 ```
@@ -261,7 +261,7 @@ prompt.input_variables #=> ["adjective", "content"]
 ### Using Agents 🤖
 Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
-#### Chain-of-Thought Agent
+#### ReAct Agent
 Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
@@ -271,7 +271,7 @@ calculator = Langchain::Tool::Calculator.new
 openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
-agent = Langchain::Agent::ChainOfThoughtAgent.new(
+agent = Langchain::Agent::ReActAgent.new(
   llm: openai,
   tools: [search_tool, calculator]
 )

data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require "langchain"
+# Generate a prompt that directs the LLM to provide a JSON response that adheres to a specific JSON schema.
+json_schema = {
+  type: "object",
+  properties: {
+    name: {
+      type: "string",
+      description: "Persons name"
+    },
+    age: {
+      type: "number",
+      description: "Persons age"
+    },
+    interests: {
+      type: "array",
+      items: {
+        type: "object",
+        properties: {
+          interest: {
+            type: "string",
+            description: "A topic of interest"
+          },
+          levelOfInterest: {
+            type: "number",
+            description: "A value between 0 and 100 of how interested the person is in this interest"
+          }
+        },
+        required: ["interest", "levelOfInterest"],
+        additionalProperties: false
+      },
+      minItems: 1,
+      maxItems: 3,
+      description: "A list of the person's interests"
+    }
+  },
+  required: ["name", "age", "interests"],
+  additionalProperties: false
+}
+parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
+prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
+# Generate details of a fictional character.
+# You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
+# "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
+# For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}, "required": ["foo"]}
+# would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
+# Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
+# Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
+# Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
+# ```json
+# {"type":"object","properties":{"name":{"type":"string","description":"Persons name"},"age":{"type":"number","description":"Persons age"},"interests":{"type":"array","items":{"type":"object","properties":{"interest":{"type":"string","description":"A topic of interest"},"levelOfInterest":{"type":"number","description":"A value between 0 and 100 of how interested the person is in this interest"},"required":["interest","levelOfInterest"],"additionalProperties":false},"minItems":1,"maxItems":3,"description":"A list of the person's interests"},"required":["name","age","interests"],"additionalProperties":false}
+# ```
+# Character description: Korean chemistry student
+# LLM example response:
+llm_example_response = <<~RESPONSE
+  Here is your character:
+  ```json
+  {
+    "name": "Kim Ji-hyun",
+    "age": 22,
+    "interests": [
+      {
+        "interest": "Organic Chemistry",
+        "levelOfInterest": 85
+      },
+      {
+        "interest": "Biochemistry",
+        "levelOfInterest": 70
+      },
+      {
+        "interest": "Analytical Chemistry",
+        "levelOfInterest": 60
+      }
+    ]
+  }
+  ```
+RESPONSE
+parser.parse(llm_example_response)
+# {
+#   "name" => "Kim Ji-hyun",
+#   "age" => 22,
+#   "interests" => [
+#     {
+#       "interest" => "Organic Chemistry",
+#       "levelOfInterest" => 85
+#     },
+#     {
+#       "interest" => "Biochemistry",
+#       "levelOfInterest" => 70
+#     },
+#     {
+#       "interest" => "Analytical Chemistry",
+#       "levelOfInterest" => 60
+#     }
+#   ]
+# }

data/lib/langchain/agent/base.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Langchain::Agent
   # Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
   #
   # Available:
-  # - {Langchain::Agent::ChainOfThoughtAgent}
+  # - {Langchain::Agent::ReActAgent}
   #
   # @abstract
   class Base

data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} RENAMED Viewed

@@ -1,11 +1,11 @@
 # frozen_string_literal: true
 module Langchain::Agent
-  # = Chain of Thought Agent
+  # = ReAct Agent
   #
   #     llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
   #
-  #     agent = Langchain::Agent::ChainOfThoughtAgent.new(
+  #     agent = Langchain::Agent::ReActAgent.new(
   #       llm: llm,
   #       tools: ["google_search", "calculator", "wikipedia"]
   #     )
@@ -15,7 +15,7 @@ module Langchain::Agent
   #
   #     agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
   #     #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
-  class ChainOfThoughtAgent < Base
+  class ReActAgent < Base
     attr_reader :llm, :tools, :max_iterations
     # Initializes the Agent
@@ -23,7 +23,7 @@ module Langchain::Agent
     # @param llm [Object] The LLM client to use
     # @param tools [Array] The tools to use
     # @param max_iterations [Integer] The maximum number of iterations to run
-    # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
+    # @return [ReActAgent] The Agent::ReActAgent instance
     def initialize(llm:, tools: [], max_iterations: 10)
       Langchain::Tool::Base.validate_tools!(tools: tools)
@@ -117,7 +117,7 @@ module Langchain::Agent
     # @return [PromptTemplate] PromptTemplate instance
     def prompt_template
       @template ||= Langchain::Prompt.load_from_path(
-        file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml")
+        file_path: Langchain.root.join("langchain/agent/react_agent/react_agent_prompt.yaml")
       )
     end

data/lib/langchain/data.rb CHANGED Viewed

@@ -1,18 +1,26 @@
 # frozen_string_literal: true
 module Langchain
+  # Abstraction for data loaded by a {Langchain::Loader}
   class Data
+    # URL or Path of the data source
+    # @return [String]
     attr_reader :source
+    # @param data [String] data that was loaded
+    # @option options [String] :source URL or Path of the data source
     def initialize(data, options = {})
       @source = options[:source]
       @data = data
     end
+    # @return [String]
     def value
       @data
     end
+    # @param opts [Hash] options passed to the chunker
+    # @return [Array<String>]
     def chunks(opts = {})
       Langchain::Chunker::Text.new(@data, **opts).chunks
     end

data/lib/langchain/llm/ai21.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Langchain::LLM
   # Wrapper around AI21 Studio APIs.
   #
   # Gem requirements:
-  #   gem "ai21", "~> 0.2.0"
+  #   gem "ai21", "~> 0.2.1"
   #
   # Usage:
   #     ai21 = Langchain::LLM::AI21.new(api_key:)
@@ -13,9 +13,11 @@ module Langchain::LLM
   class AI21 < Base
     DEFAULTS = {
       temperature: 0.0,
-      model: "j2-large"
+      model: "j2-ultra"
     }.freeze
+    LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
     def initialize(api_key:, default_options: {})
       depends_on "ai21"
       require "ai21"
@@ -34,6 +36,8 @@ module Langchain::LLM
     def complete(prompt:, **params)
       parameters = complete_parameters params
+      parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], client)
       response = client.complete(prompt, parameters)
       response.dig(:completions, 0, :data, :text)
     end

data/lib/langchain/llm/cohere.rb CHANGED Viewed

@@ -13,9 +13,10 @@ module Langchain::LLM
   class Cohere < Base
     DEFAULTS = {
       temperature: 0.0,
-      completion_model_name: "base",
+      completion_model_name: "command",
       embeddings_model_name: "small",
-      dimension: 1024
+      dimension: 1024,
+      truncate: "START"
     }.freeze
     def initialize(api_key:, default_options: {})
@@ -51,7 +52,8 @@ module Langchain::LLM
       default_params = {
         prompt: prompt,
         temperature: @defaults[:temperature],
-        model: @defaults[:completion_model_name]
+        model: @defaults[:completion_model_name],
+        truncate: @defaults[:truncate]
       }
       if params[:stop_sequences]
@@ -60,6 +62,8 @@ module Langchain::LLM
       default_params.merge!(params)
+      default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
       response = client.generate(**default_params)
       response.dig("generations").first.dig("text")
     end

data/lib/langchain/llm/google_palm.rb CHANGED Viewed

@@ -5,21 +5,12 @@ module Langchain::LLM
   # Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
   #
   # Gem requirements:
-  #     gem "google_palm_api", "~> 0.1.0"
+  #     gem "google_palm_api", "~> 0.1.2"
   #
   # Usage:
   #     google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
   #
   class GooglePalm < Base
-    #
-    # Wrapper around the Google PaLM (Pathways Language Model) APIs.
-    #
-    # Gem requirements: gem "google_palm_api", "~> 0.1.1"
-    #
-    # Usage:
-    # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
-    #
     DEFAULTS = {
       temperature: 0.0,
       dimension: 768, # This is what the `embedding-gecko-001` model generates
@@ -61,7 +52,7 @@ module Langchain::LLM
       default_params = {
         prompt: prompt,
         temperature: @defaults[:temperature],
-        completion_model_name: @defaults[:completion_model_name]
+        model: @defaults[:completion_model_name]
       }
       if params[:stop_sequences]
@@ -91,7 +82,7 @@ module Langchain::LLM
       default_params = {
         temperature: @defaults[:temperature],
-        chat_completion_model_name: @defaults[:chat_completion_model_name],
+        model: @defaults[:chat_completion_model_name],
         context: context,
         messages: compose_chat_messages(prompt: prompt, messages: messages),
         examples: compose_examples(examples)

data/lib/langchain/loader.rb CHANGED Viewed

@@ -10,32 +10,64 @@ module Langchain
     URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
-    # Load data from a file or url
-    # Equivalent to Langchain::Loader.new(path).load
-    # @param path [String | Pathname] path to file or url
-    # @return [String] file content
+    # Load data from a file or URL. Shorthand for  `Langchain::Loader.new(path).load`
+    #
+    # == Examples
+    #
+    #     # load a URL
+    #     data = Langchain::Loader.load("https://example.com/docs/README.md")
+    #
+    #     # load a file
+    #     data = Langchain::Loader.load("README.md")
+    #
+    #    # Load data using a custom processor
+    #    data = Langchain::Loader.load("README.md") do |raw_data, options|
+    #      # your processing code goes here
+    #      # return data at the end here
+    #    end
+    #
+    # @param path [String | Pathname] path to file or URL
+    # @param options [Hash] options passed to the processor class used to process the data
+    # @return [Data] data loaded from path
     def self.load(path, options = {}, &block)
       new(path, options).load(&block)
     end
     # Initialize Langchain::Loader
-    # @param path [String | Pathname] path to file or url
+    # @param path [String | Pathname] path to file or URL
+    # @param options [Hash] options passed to the processor class used to process the data
     # @return [Langchain::Loader] loader instance
     def initialize(path, options = {})
       @options = options
       @path = path
     end
-    # Check if path is url
-    # @return [Boolean] true if path is url
+    # Is the path a URL?
+    #
+    # @return [Boolean] true if path is URL
     def url?
       return false if @path.is_a?(Pathname)
       !!(@path =~ URI_REGEX)
     end
-    # Load data from a file or url
-    # @return [String] file content
+    # Load data from a file or URL
+    #
+    #    loader = Langchain::Loader.new("README.md")
+    #    # Load data using default processor for the file
+    #    loader.load
+    #
+    #    # Load data using a custom processor
+    #    loader.load do |raw_data, options|
+    #      # your processing code goes here
+    #      # return data at the end here
+    #    end
+    #
+    # @yield [String, Hash] handle parsing raw output into string directly
+    # @yieldparam [String] raw_data from the loaded URL or file
+    # @yieldreturn [String] parsed data, as a String
+    #
+    # @return [Data] data that was loaded
     def load(&block)
       @raw_data = url? ? load_from_url : load_from_path

data/lib/langchain/output_parsers/base.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module Langchain::OutputParsers
+  # Structured output parsers from the LLM.
+  #
+  # @abstract
+  class Base
+    #
+    # Parse the output of an LLM call.
+    #
+    # @param text - LLM output to parse.
+    # @returns Parsed output.
+    #
+    def parse(text:)
+      raise NotImplementedError
+    end
+    #
+    # Return a string describing the format of the output.
+    #
+    # @returns Format instructions.
+    # @param options - Options for formatting instructions.
+    # @example
+    # ```json
+    # {
+    #  "foo": "bar"
+    # }
+    # ```
+    #
+    def get_format_instructions
+      raise NotImplementedError
+    end
+  end
+  class OutputParserException < StandardError
+    def initialize(message, text)
+      @message = message
+      @text = text
+    end
+    def to_s
+      "#{@message}\nText: #{@text}"
+    end
+  end
+end

data/lib/langchain/output_parsers/structured.rb ADDED Viewed

@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+require "json"
+require "json-schema"
+module Langchain::OutputParsers
+  # = Structured Output Parser
+  #
+  class StructuredOutputParser < Base
+    attr_reader :schema
+    #
+    # Initializes a new instance of the class.
+    #
+    # @param schema [JSON::Schema] The json schema
+    #
+    def initialize(schema:)
+      @schema = validate_schema!(schema)
+    end
+    def to_h
+      {
+        _type: "StructuredOutputParser",
+        schema: schema.to_json
+      }
+    end
+    #
+    # Creates a new instance of the class using the given JSON::Schema.
+    #
+    # @param schema [JSON::Schema] The JSON::Schema to use
+    #
+    # @return [Object] A new instance of the class
+    #
+    def self.from_json_schema(schema)
+      new(schema: schema)
+    end
+    #
+    # Returns a string containing instructions for how the output of a language model should be formatted
+    # according to the @schema.
+    #
+    # @return [String] Instructions for how the output of a language model should be formatted
+    # according to the @schema.
+    #
+    def get_format_instructions
+      <<~INSTRUCTIONS
+        You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
+        "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
+        For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
+        would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
+        Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}}} is not well-formatted.
+        Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
+        Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
+        ```json
+        #{schema.to_json}
+        ```
+      INSTRUCTIONS
+    end
+    #
+    # Parse the output of an LLM call extracting an object that abides by the @schema
+    #
+    # @param text [String] Text output from the LLM call
+    #
+    # @return [Object] object that abides by the @schema
+    #
+    def parse(text)
+      json = text.include?("```") ? text.strip.split(/```(?:json)?/)[1] : text.strip
+      parsed = JSON.parse(json)
+      JSON::Validator.validate!(schema, parsed)
+      parsed
+    rescue => e
+      raise OutputParserException.new("Failed to parse. Text: \"#{text}\". Error: #{e}", text)
+    end
+    private
+    def validate_schema!(schema)
+      errors = JSON::Validator.fully_validate_schema(schema)
+      unless errors.empty?
+        raise ArgumentError, "Invalid schema: \n#{errors.join("\n")}"
+      end
+      schema
+    end
+  end
+end

data/lib/langchain/tool/base.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Langchain::Tool
   #
   # 3. Pass the tools when Agent is instantiated.
   #
-  #     agent = Langchain::Agent::ChainOfThoughtAgent.new(
+  #     agent = Langchain::Agent::ReActAgent.new(
   #       llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
   #       llm_api_key: ENV["OPENAI_API_KEY"],
   #       tools: ["google_search", "calculator", "wikipedia"]

data/lib/langchain/tool/database.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module Langchain::Tool
       The input to this tool should be valid SQL.
     DESC
-    attr_reader :db, :requested_tables, :except_tables
+    attr_reader :db, :requested_tables, :excluded_tables
     #
     # Establish a database connection
@@ -25,16 +25,15 @@ module Langchain::Tool
     # @return [Database] Database object
     #
-    def initialize(connection_string:, tables: [], except_tables: [])
+    def initialize(connection_string:, tables: [], exclude_tables: [])
       depends_on "sequel"
       require "sequel"
-      require "sequel/extensions/schema_dumper"
       raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
       @db = Sequel.connect(connection_string)
       @requested_tables = tables
-      @except_tables = except_tables
+      @excluded_tables = exclude_tables
     end
     #
@@ -46,20 +45,31 @@ module Langchain::Tool
       Langchain.logger.info("Dumping schema tables and keys", for: self.class)
       schema = ""
       db.tables.each do |table|
-        next if except_tables.include?(table)
+        next if excluded_tables.include?(table)
         next unless requested_tables.empty? || requested_tables.include?(table)
+        primary_key_columns = []
+        primary_key_column_count = db.schema(table).count { |column| column[1][:primary_key] == true }
         schema << "CREATE TABLE #{table}(\n"
         db.schema(table).each do |column|
           schema << "#{column[0]} #{column[1][:type]}"
-          schema << " PRIMARY KEY" if column[1][:primary_key] == true
-          schema << "," unless column == db.schema(table).last
-          schema << "\n"
+          if column[1][:primary_key] == true
+            schema << " PRIMARY KEY" if primary_key_column_count == 1
+          else
+            primary_key_columns << column[0]
+          end
+          schema << ",\n" unless column == db.schema(table).last && primary_key_column_count == 1
+        end
+        if primary_key_column_count > 1
+          schema << "PRIMARY KEY (#{primary_key_columns.join(",")})"
         end
-        schema << ");\n"
         db.foreign_key_list(table).each do |fk|
-          schema << "ALTER TABLE #{table} ADD FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]});\n"
+          schema << ",\n" if fk == db.foreign_key_list(table).first
+          schema << "FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]})"
+          schema << ",\n" unless fk == db.foreign_key_list(table).last
         end
+        schema << ");\n"
       end
       schema
     end

data/lib/langchain/utils/token_length/ai21_validator.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module Langchain
+  module Utils
+    module TokenLength
+      #
+      # This class is meant to validate the length of the text passed in to AI21's API.
+      # It is used to validate the token length before the API call is made
+      #
+      class AI21Validator < BaseValidator
+        TOKEN_LIMITS = {
+          "j2-ultra" => 8192,
+          "j2-mid" => 8192,
+          "j2-light" => 8192
+        }.freeze
+        #
+        # Calculate token length for a given text and model name
+        #
+        # @param text [String] The text to calculate the token length for
+        # @param model_name [String] The model name to validate against
+        # @return [Integer] The token length of the text
+        #
+        def self.token_length(text, model_name, client)
+          res = client.tokenize(text)
+          res.dig(:tokens).length
+        end
+        def self.token_limit(model_name)
+          TOKEN_LIMITS[model_name]
+        end
+      end
+    end
+  end
+end

data/lib/langchain/utils/token_length/cohere_validator.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+module Langchain
+  module Utils
+    module TokenLength
+      #
+      # This class is meant to validate the length of the text passed in to Cohere's API.
+      # It is used to validate the token length before the API call is made
+      #
+      class CohereValidator < BaseValidator
+        TOKEN_LIMITS = {
+          # Source:
+          # https://docs.cohere.com/docs/models
+          "command-light" => 4096,
+          "command" => 4096,
+          "base-light" => 2048,
+          "base" => 2048,
+          "embed-english-light-v2.0" => 512,
+          "embed-english-v2.0" => 512,
+          "embed-multilingual-v2.0" => 256,
+          "summarize-medium" => 2048,
+          "summarize-xlarge" => 2048
+        }.freeze
+        #
+        # Calculate token length for a given text and model name
+        #
+        # @param text [String] The text to calculate the token length for
+        # @param model_name [String] The model name to validate against
+        # @return [Integer] The token length of the text
+        #
+        def self.token_length(text, model_name, client)
+          res = client.tokenize(text: text)
+          res["tokens"].length
+        end
+        def self.token_limit(model_name)
+          TOKEN_LIMITS[model_name]
+        end
+      end
+    end
+  end
+end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.5.7"
+  VERSION = "0.6.0"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -74,7 +74,7 @@ module Langchain
   module Agent
     autoload :Base, "langchain/agent/base"
-    autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
+    autoload :ReActAgent, "langchain/agent/react_agent/react_agent.rb"
     autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
   end
@@ -108,9 +108,11 @@ module Langchain
   module Utils
     module TokenLength
       autoload :BaseValidator, "langchain/utils/token_length/base_validator"
-      autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
-      autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
+      autoload :AI21Validator, "langchain/utils/token_length/ai21_validator"
+      autoload :CohereValidator, "langchain/utils/token_length/cohere_validator"
       autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
+      autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
+      autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
     end
   end
@@ -143,6 +145,11 @@ module Langchain
     autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
   end
+  module OutputParsers
+    autoload :Base, "langchain/output_parsers/base"
+    autoload :StructuredOutputParser, "langchain/output_parsers/structured"
+  end
   module Errors
     class BaseError < StandardError; end
   end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.5.7
+  version: 0.6.0
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-20 00:00:00.000000000 Z
+date: 2023-06-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -52,6 +52,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.0.5
+- !ruby/object:Gem::Dependency
+  name: json-schema
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 4.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 4.0.0
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
   requirement: !ruby/object:Gem::Requirement
@@ -114,14 +128,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.2.1
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.2.1
 - !ruby/object:Gem::Dependency
   name: chroma-db
   requirement: !ruby/object:Gem::Requirement
@@ -142,14 +156,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.4
+        version: 0.9.5
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.4
+        version: 0.9.5
 - !ruby/object:Gem::Dependency
   name: docx
   requirement: !ruby/object:Gem::Requirement
@@ -184,14 +198,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.1
+        version: 0.1.2
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.1
+        version: 0.1.2
 - !ruby/object:Gem::Dependency
   name: google_search_results
   requirement: !ruby/object:Gem::Requirement
@@ -462,6 +476,7 @@ files:
 - Rakefile
 - examples/create_and_manage_few_shot_prompt_templates.rb
 - examples/create_and_manage_prompt_templates.rb
+- examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
 - examples/pdf_store_and_query_with_chroma.rb
 - examples/store_and_query_with_pinecone.rb
 - examples/store_and_query_with_qdrant.rb
@@ -469,8 +484,8 @@ files:
 - lefthook.yml
 - lib/langchain.rb
 - lib/langchain/agent/base.rb
-- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
-- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml
+- lib/langchain/agent/react_agent/react_agent.rb
+- lib/langchain/agent/react_agent/react_agent_prompt.yaml
 - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
 - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
 - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
@@ -489,6 +504,8 @@ files:
 - lib/langchain/llm/prompts/summarize_template.yaml
 - lib/langchain/llm/replicate.rb
 - lib/langchain/loader.rb
+- lib/langchain/output_parsers/base.rb
+- lib/langchain/output_parsers/structured.rb
 - lib/langchain/processors/base.rb
 - lib/langchain/processors/csv.rb
 - lib/langchain/processors/docx.rb
@@ -509,7 +526,9 @@ files:
 - lib/langchain/tool/ruby_code_interpreter.rb
 - lib/langchain/tool/weather.rb
 - lib/langchain/tool/wikipedia.rb
+- lib/langchain/utils/token_length/ai21_validator.rb
 - lib/langchain/utils/token_length/base_validator.rb
+- lib/langchain/utils/token_length/cohere_validator.rb
 - lib/langchain/utils/token_length/google_palm_validator.rb
 - lib/langchain/utils/token_length/openai_validator.rb
 - lib/langchain/utils/token_length/token_limit_exceeded.rb

/data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} RENAMED Viewed

File without changes