RubyGems - langchainrb - Versions diffs - 0.5.7 → 0.6.1 - Mend

langchainrb 0.5.7 → 0.6.1

Files changed (25) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/Gemfile.lock +11 -8
data/README.md +5 -5
data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb +104 -0
data/lib/langchain/active_record/hooks.rb +96 -0
data/lib/langchain/agent/base.rb +1 -1
data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} +5 -5
data/lib/langchain/data.rb +8 -0
data/lib/langchain/llm/ai21.rb +6 -2
data/lib/langchain/llm/cohere.rb +7 -3
data/lib/langchain/llm/google_palm.rb +3 -12
data/lib/langchain/loader.rb +41 -9
data/lib/langchain/output_parsers/base.rb +45 -0
data/lib/langchain/output_parsers/structured.rb +91 -0
data/lib/langchain/railtie.rb +11 -0
data/lib/langchain/tool/base.rb +1 -1
data/lib/langchain/tool/database.rb +20 -10
data/lib/langchain/utils/token_length/ai21_validator.rb +36 -0
data/lib/langchain/utils/token_length/cohere_validator.rb +44 -0
data/lib/langchain/vectorsearch/weaviate.rb +59 -17
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +16 -3
metadata +31 -10
/data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: ba5e9e8257d18c0940fdaf4fe84c03d594d8f1151e40e1bb35de059f8e6e5094
-  data.tar.gz: 11310635819502b9bfbd66bc45dc7aa1ce500d4a874dcc5ab550d6c5edf7194f
+  metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
+  data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
 SHA512:
-  metadata.gz: 4b97e21bcbc0c5f1d842271b64949c07d6d78190cd97c22fd0dab735d6b6ae2f2e6328ba2631dfc77ed0a5dd227573e3f84f064e8dd9332701848a798747ac9a
-  data.tar.gz: 267b2029de10acf45bb97a040d174102f666e048aaaf03ab76218cd5281574c1ae977ba8e975faf4b690e677611daba2fb0fc975801c0e41072f050ec2ac2e34
+  metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
+  data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 ## [Unreleased]
+## [0.6.1] - 2023-06-24
+- Adding support to hook vectorsearch into ActiveRecord models
+## [0.6.0] - 2023-06-22
+- [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
+- Implement A21 token validator
+- Add `Langchain::OutputParsers`
 ## [0.5.7] - 2023-06-19
 - Developer can modify models used when initiliazing `Langchain::LLM::*` clients
 - Improvements to the `SQLQueryAgent` and the database tool

data/Gemfile.lock CHANGED Viewed

@@ -1,9 +1,10 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.5.7)
+    langchainrb (0.6.1)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
+      json-schema (~> 4.0.0)
       tiktoken_ruby (~> 0.0.5)
 GEM
@@ -31,7 +32,7 @@ GEM
     addressable (2.8.4)
       public_suffix (>= 2.0.2, < 6.0)
     afm (0.2.2)
-    ai21 (0.2.0)
+    ai21 (0.2.1)
     ast (2.4.2)
     baran (0.1.6)
     builder (3.2.4)
@@ -41,7 +42,7 @@ GEM
       dry-monads (~> 1.6)
       ruby-next-core (>= 0.15.0)
     coderay (1.1.3)
-    cohere-ruby (0.9.4)
+    cohere-ruby (0.9.5)
       faraday (>= 1.0.0)
       faraday_middleware (>= 1.0.0)
     colorize (0.8.1)
@@ -124,7 +125,7 @@ GEM
     faraday-retry (1.0.3)
     faraday_middleware (1.2.0)
       faraday (~> 1.0)
-    google_palm_api (0.1.1)
+    google_palm_api (0.1.2)
       faraday (>= 1.0.0)
       faraday_middleware (>= 1.0.0)
     google_search_results (2.0.1)
@@ -148,6 +149,8 @@ GEM
       concurrent-ruby (~> 1.0)
     ice_nine (0.11.2)
     json (2.6.3)
+    json-schema (4.0.0)
+      addressable (>= 2.8)
     language_server-protocol (3.17.0.3)
     lint_roller (1.0.0)
     loofah (2.21.1)
@@ -219,7 +222,7 @@ GEM
       zeitwerk (~> 2.5)
     rainbow (3.1.1)
     rake (13.0.6)
-    rb_sys (0.9.78)
+    rb_sys (0.9.79)
     rdiscount (2.2.7)
     regexp_parser (2.8.0)
     replicate-ruby (0.2.2)
@@ -313,13 +316,13 @@ PLATFORMS
   x86_64-linux
 DEPENDENCIES
-  ai21 (~> 0.2.0)
+  ai21 (~> 0.2.1)
   chroma-db (~> 0.3.0)
-  cohere-ruby (~> 0.9.4)
+  cohere-ruby (~> 0.9.5)
   docx (~> 0.8.0)
   dotenv-rails (~> 2.7.6)
   eqn (~> 1.6.5)
-  google_palm_api (~> 0.1.1)
+  google_palm_api (~> 0.1.2)
   google_search_results (~> 2.0.0)
   hnswlib (~> 0.8.1)
   hugging-face (~> 0.3.4)

data/README.md CHANGED Viewed

@@ -41,7 +41,7 @@ require "langchain"
 | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
 | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
-| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | WIP               |
+| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP     | :white_check_mark: |
 ### Using Vector Search Databases 🔍
@@ -155,13 +155,13 @@ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
 ```
 #### Google PaLM (Pathways Language Model)
-Add `"google_palm_api", "~> 0.1.1"` to your Gemfile.
+Add `"google_palm_api", "~> 0.1.2"` to your Gemfile.
 ```ruby
 google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 ```
 #### AI21
-Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
+Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
 ```ruby
 ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
 ```
@@ -261,7 +261,7 @@ prompt.input_variables #=> ["adjective", "content"]
 ### Using Agents 🤖
 Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
-#### Chain-of-Thought Agent
+#### ReAct Agent
 Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
@@ -271,7 +271,7 @@ calculator = Langchain::Tool::Calculator.new
 openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
-agent = Langchain::Agent::ChainOfThoughtAgent.new(
+agent = Langchain::Agent::ReActAgent.new(
   llm: openai,
   tools: [search_tool, calculator]
 )

data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb ADDED Viewed

@@ -0,0 +1,104 @@
+require "langchain"
+# Generate a prompt that directs the LLM to provide a JSON response that adheres to a specific JSON schema.
+json_schema = {
+  type: "object",
+  properties: {
+    name: {
+      type: "string",
+      description: "Persons name"
+    },
+    age: {
+      type: "number",
+      description: "Persons age"
+    },
+    interests: {
+      type: "array",
+      items: {
+        type: "object",
+        properties: {
+          interest: {
+            type: "string",
+            description: "A topic of interest"
+          },
+          levelOfInterest: {
+            type: "number",
+            description: "A value between 0 and 100 of how interested the person is in this interest"
+          }
+        },
+        required: ["interest", "levelOfInterest"],
+        additionalProperties: false
+      },
+      minItems: 1,
+      maxItems: 3,
+      description: "A list of the person's interests"
+    }
+  },
+  required: ["name", "age", "interests"],
+  additionalProperties: false
+}
+parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
+prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
+# Generate details of a fictional character.
+# You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
+# "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
+# For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}, "required": ["foo"]}
+# would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
+# Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
+# Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
+# Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
+# ```json
+# {"type":"object","properties":{"name":{"type":"string","description":"Persons name"},"age":{"type":"number","description":"Persons age"},"interests":{"type":"array","items":{"type":"object","properties":{"interest":{"type":"string","description":"A topic of interest"},"levelOfInterest":{"type":"number","description":"A value between 0 and 100 of how interested the person is in this interest"},"required":["interest","levelOfInterest"],"additionalProperties":false},"minItems":1,"maxItems":3,"description":"A list of the person's interests"},"required":["name","age","interests"],"additionalProperties":false}
+# ```
+# Character description: Korean chemistry student
+# LLM example response:
+llm_example_response = <<~RESPONSE
+  Here is your character:
+  ```json
+  {
+    "name": "Kim Ji-hyun",
+    "age": 22,
+    "interests": [
+      {
+        "interest": "Organic Chemistry",
+        "levelOfInterest": 85
+      },
+      {
+        "interest": "Biochemistry",
+        "levelOfInterest": 70
+      },
+      {
+        "interest": "Analytical Chemistry",
+        "levelOfInterest": 60
+      }
+    ]
+  }
+  ```
+RESPONSE
+parser.parse(llm_example_response)
+# {
+#   "name" => "Kim Ji-hyun",
+#   "age" => 22,
+#   "interests" => [
+#     {
+#       "interest" => "Organic Chemistry",
+#       "levelOfInterest" => 85
+#     },
+#     {
+#       "interest" => "Biochemistry",
+#       "levelOfInterest" => 70
+#     },
+#     {
+#       "interest" => "Analytical Chemistry",
+#       "levelOfInterest" => 60
+#     }
+#   ]
+# }

data/lib/langchain/active_record/hooks.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+module Langchain
+  module ActiveRecord
+    # This module adds the following functionality to your ActiveRecord models:
+    # * `vectorsearch` class method to set the vector search provider
+    # * `similarity_search` class method to search for similar texts
+    # * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
+    #
+    # Usage:
+    #     class Recipe < ActiveRecord::Base
+    #       vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
+    #                    api_key: ENV["WEAVIATE_API_KEY"],
+    #                    url: ENV["WEAVIATE_URL"],
+    #                    index_name: "Recipes",
+    #                    llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
+    #                 )
+    #
+    #       after_save :upsert_to_vectorsearch
+    #
+    #       # Overwriting how the model is serialized before it's indexed
+    #       def as_vector
+    #         [
+    #           "Title: #{title}",
+    #           "Description: #{description}",
+    #           ...
+    #         ]
+    #         .compact
+    #         .join("\n")
+    #       end
+    #     end
+    #
+    # Create the default schema
+    #     Recipe.class_variable_get(:@@provider).create_default_schema
+    # Query the vector search provider
+    #     Recipe.similarity_search("carnivore dish")
+    # Delete the default schema to start over
+    #     Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
+    #
+    module Hooks
+      def self.included(base)
+        base.extend ClassMethods
+      end
+      # Index the text to the vector search provider
+      # You'd typically call this method in an ActiveRecord callback
+      #
+      # @return [Boolean] true
+      # @raise [Error] Indexing to vector search DB failed
+      def upsert_to_vectorsearch
+        if previously_new_record?
+          self.class.class_variable_get(:@@provider).add_texts(
+            texts: [as_vector],
+            ids: [id]
+          )
+        else
+          self.class.class_variable_get(:@@provider).update_texts(
+            texts: [as_vector],
+            ids: [id]
+          )
+        end
+      end
+      # Used to serialize the DB record to an indexable vector text
+      # Overwrite this method in your model to customize
+      #
+      # @return [String] the text representation of the model
+      def as_vector
+        to_json
+      end
+      module ClassMethods
+        # Set the vector search provider
+        #
+        # @param provider [Object] The `Langchain::Vectorsearch::*` instance
+        def vectorsearch(provider:)
+          class_variable_set(:@@provider, provider)
+        end
+        # Search for similar texts
+        #
+        # @param query [String] The query to search for
+        # @param k [Integer] The number of results to return
+        # @return [ActiveRecord::Relation] The ActiveRecord relation
+        def similarity_search(query, k: 1)
+          records = class_variable_get(:@@provider).similarity_search(
+            query: query,
+            k: k
+          )
+          ids = records.map { |record| record.dig("__id") }
+          where(id: ids)
+        end
+      end
+    end
+  end
+end

data/lib/langchain/agent/base.rb CHANGED Viewed

@@ -6,7 +6,7 @@ module Langchain::Agent
   # Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
   #
   # Available:
-  # - {Langchain::Agent::ChainOfThoughtAgent}
+  # - {Langchain::Agent::ReActAgent}
   #
   # @abstract
   class Base

data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} RENAMED Viewed

@@ -1,11 +1,11 @@
 # frozen_string_literal: true
 module Langchain::Agent
-  # = Chain of Thought Agent
+  # = ReAct Agent
   #
   #     llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
   #
-  #     agent = Langchain::Agent::ChainOfThoughtAgent.new(
+  #     agent = Langchain::Agent::ReActAgent.new(
   #       llm: llm,
   #       tools: ["google_search", "calculator", "wikipedia"]
   #     )
@@ -15,7 +15,7 @@ module Langchain::Agent
   #
   #     agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
   #     #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
-  class ChainOfThoughtAgent < Base
+  class ReActAgent < Base
     attr_reader :llm, :tools, :max_iterations
     # Initializes the Agent
@@ -23,7 +23,7 @@ module Langchain::Agent
     # @param llm [Object] The LLM client to use
     # @param tools [Array] The tools to use
     # @param max_iterations [Integer] The maximum number of iterations to run
-    # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
+    # @return [ReActAgent] The Agent::ReActAgent instance
     def initialize(llm:, tools: [], max_iterations: 10)
       Langchain::Tool::Base.validate_tools!(tools: tools)
@@ -117,7 +117,7 @@ module Langchain::Agent
     # @return [PromptTemplate] PromptTemplate instance
     def prompt_template
       @template ||= Langchain::Prompt.load_from_path(
-        file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml")
+        file_path: Langchain.root.join("langchain/agent/react_agent/react_agent_prompt.yaml")
       )
     end

data/lib/langchain/data.rb CHANGED Viewed

@@ -1,18 +1,26 @@
 # frozen_string_literal: true
 module Langchain
+  # Abstraction for data loaded by a {Langchain::Loader}
   class Data
+    # URL or Path of the data source
+    # @return [String]
     attr_reader :source
+    # @param data [String] data that was loaded
+    # @option options [String] :source URL or Path of the data source
     def initialize(data, options = {})
       @source = options[:source]
       @data = data
     end
+    # @return [String]
     def value
       @data
     end
+    # @param opts [Hash] options passed to the chunker
+    # @return [Array<String>]
     def chunks(opts = {})
       Langchain::Chunker::Text.new(@data, **opts).chunks
     end

data/lib/langchain/llm/ai21.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module Langchain::LLM
   # Wrapper around AI21 Studio APIs.
   #
   # Gem requirements:
-  #   gem "ai21", "~> 0.2.0"
+  #   gem "ai21", "~> 0.2.1"
   #
   # Usage:
   #     ai21 = Langchain::LLM::AI21.new(api_key:)
@@ -13,9 +13,11 @@ module Langchain::LLM
   class AI21 < Base
     DEFAULTS = {
       temperature: 0.0,
-      model: "j2-large"
+      model: "j2-ultra"
     }.freeze
+    LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
     def initialize(api_key:, default_options: {})
       depends_on "ai21"
       require "ai21"
@@ -34,6 +36,8 @@ module Langchain::LLM
     def complete(prompt:, **params)
       parameters = complete_parameters params
+      parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], client)
       response = client.complete(prompt, parameters)
       response.dig(:completions, 0, :data, :text)
     end

data/lib/langchain/llm/cohere.rb CHANGED Viewed

@@ -13,9 +13,10 @@ module Langchain::LLM
   class Cohere < Base
     DEFAULTS = {
       temperature: 0.0,
-      completion_model_name: "base",
+      completion_model_name: "command",
       embeddings_model_name: "small",
-      dimension: 1024
+      dimension: 1024,
+      truncate: "START"
     }.freeze
     def initialize(api_key:, default_options: {})
@@ -51,7 +52,8 @@ module Langchain::LLM
       default_params = {
         prompt: prompt,
         temperature: @defaults[:temperature],
-        model: @defaults[:completion_model_name]
+        model: @defaults[:completion_model_name],
+        truncate: @defaults[:truncate]
       }
       if params[:stop_sequences]
@@ -60,6 +62,8 @@ module Langchain::LLM
       default_params.merge!(params)
+      default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
       response = client.generate(**default_params)
       response.dig("generations").first.dig("text")
     end

data/lib/langchain/llm/google_palm.rb CHANGED Viewed

@@ -5,21 +5,12 @@ module Langchain::LLM
   # Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
   #
   # Gem requirements:
-  #     gem "google_palm_api", "~> 0.1.0"
+  #     gem "google_palm_api", "~> 0.1.2"
   #
   # Usage:
   #     google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
   #
   class GooglePalm < Base
-    #
-    # Wrapper around the Google PaLM (Pathways Language Model) APIs.
-    #
-    # Gem requirements: gem "google_palm_api", "~> 0.1.1"
-    #
-    # Usage:
-    # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
-    #
     DEFAULTS = {
       temperature: 0.0,
       dimension: 768, # This is what the `embedding-gecko-001` model generates
@@ -61,7 +52,7 @@ module Langchain::LLM
       default_params = {
         prompt: prompt,
         temperature: @defaults[:temperature],
-        completion_model_name: @defaults[:completion_model_name]
+        model: @defaults[:completion_model_name]
       }
       if params[:stop_sequences]
@@ -91,7 +82,7 @@ module Langchain::LLM
       default_params = {
         temperature: @defaults[:temperature],
-        chat_completion_model_name: @defaults[:chat_completion_model_name],
+        model: @defaults[:chat_completion_model_name],
         context: context,
         messages: compose_chat_messages(prompt: prompt, messages: messages),
         examples: compose_examples(examples)

data/lib/langchain/loader.rb CHANGED Viewed

@@ -10,32 +10,64 @@ module Langchain
     URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
-    # Load data from a file or url
-    # Equivalent to Langchain::Loader.new(path).load
-    # @param path [String | Pathname] path to file or url
-    # @return [String] file content
+    # Load data from a file or URL. Shorthand for  `Langchain::Loader.new(path).load`
+    #
+    # == Examples
+    #
+    #     # load a URL
+    #     data = Langchain::Loader.load("https://example.com/docs/README.md")
+    #
+    #     # load a file
+    #     data = Langchain::Loader.load("README.md")
+    #
+    #    # Load data using a custom processor
+    #    data = Langchain::Loader.load("README.md") do |raw_data, options|
+    #      # your processing code goes here
+    #      # return data at the end here
+    #    end
+    #
+    # @param path [String | Pathname] path to file or URL
+    # @param options [Hash] options passed to the processor class used to process the data
+    # @return [Data] data loaded from path
     def self.load(path, options = {}, &block)
       new(path, options).load(&block)
     end
     # Initialize Langchain::Loader
-    # @param path [String | Pathname] path to file or url
+    # @param path [String | Pathname] path to file or URL
+    # @param options [Hash] options passed to the processor class used to process the data
     # @return [Langchain::Loader] loader instance
     def initialize(path, options = {})
       @options = options
       @path = path
     end
-    # Check if path is url
-    # @return [Boolean] true if path is url
+    # Is the path a URL?
+    #
+    # @return [Boolean] true if path is URL
     def url?
       return false if @path.is_a?(Pathname)
       !!(@path =~ URI_REGEX)
     end
-    # Load data from a file or url
-    # @return [String] file content
+    # Load data from a file or URL
+    #
+    #    loader = Langchain::Loader.new("README.md")
+    #    # Load data using default processor for the file
+    #    loader.load
+    #
+    #    # Load data using a custom processor
+    #    loader.load do |raw_data, options|
+    #      # your processing code goes here
+    #      # return data at the end here
+    #    end
+    #
+    # @yield [String, Hash] handle parsing raw output into string directly
+    # @yieldparam [String] raw_data from the loaded URL or file
+    # @yieldreturn [String] parsed data, as a String
+    #
+    # @return [Data] data that was loaded
     def load(&block)
       @raw_data = url? ? load_from_url : load_from_path

data/lib/langchain/output_parsers/base.rb ADDED Viewed

@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+module Langchain::OutputParsers
+  # Structured output parsers from the LLM.
+  #
+  # @abstract
+  class Base
+    #
+    # Parse the output of an LLM call.
+    #
+    # @param text - LLM output to parse.
+    # @returns Parsed output.
+    #
+    def parse(text:)
+      raise NotImplementedError
+    end
+    #
+    # Return a string describing the format of the output.
+    #
+    # @returns Format instructions.
+    # @param options - Options for formatting instructions.
+    # @example
+    # ```json
+    # {
+    #  "foo": "bar"
+    # }
+    # ```
+    #
+    def get_format_instructions
+      raise NotImplementedError
+    end
+  end
+  class OutputParserException < StandardError
+    def initialize(message, text)
+      @message = message
+      @text = text
+    end
+    def to_s
+      "#{@message}\nText: #{@text}"
+    end
+  end
+end

data/lib/langchain/output_parsers/structured.rb ADDED Viewed

@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+require "json"
+require "json-schema"
+module Langchain::OutputParsers
+  # = Structured Output Parser
+  #
+  class StructuredOutputParser < Base
+    attr_reader :schema
+    #
+    # Initializes a new instance of the class.
+    #
+    # @param schema [JSON::Schema] The json schema
+    #
+    def initialize(schema:)
+      @schema = validate_schema!(schema)
+    end
+    def to_h
+      {
+        _type: "StructuredOutputParser",
+        schema: schema.to_json
+      }
+    end
+    #
+    # Creates a new instance of the class using the given JSON::Schema.
+    #
+    # @param schema [JSON::Schema] The JSON::Schema to use
+    #
+    # @return [Object] A new instance of the class
+    #
+    def self.from_json_schema(schema)
+      new(schema: schema)
+    end
+    #
+    # Returns a string containing instructions for how the output of a language model should be formatted
+    # according to the @schema.
+    #
+    # @return [String] Instructions for how the output of a language model should be formatted
+    # according to the @schema.
+    #
+    def get_format_instructions
+      <<~INSTRUCTIONS
+        You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
+        "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
+        For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
+        would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
+        Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}}} is not well-formatted.
+        Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
+        Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
+        ```json
+        #{schema.to_json}
+        ```
+      INSTRUCTIONS
+    end
+    #
+    # Parse the output of an LLM call extracting an object that abides by the @schema
+    #
+    # @param text [String] Text output from the LLM call
+    #
+    # @return [Object] object that abides by the @schema
+    #
+    def parse(text)
+      json = text.include?("```") ? text.strip.split(/```(?:json)?/)[1] : text.strip
+      parsed = JSON.parse(json)
+      JSON::Validator.validate!(schema, parsed)
+      parsed
+    rescue => e
+      raise OutputParserException.new("Failed to parse. Text: \"#{text}\". Error: #{e}", text)
+    end
+    private
+    def validate_schema!(schema)
+      errors = JSON::Validator.fully_validate_schema(schema)
+      unless errors.empty?
+        raise ArgumentError, "Invalid schema: \n#{errors.join("\n")}"
+      end
+      schema
+    end
+  end
+end

data/lib/langchain/railtie.rb ADDED Viewed

@@ -0,0 +1,11 @@
+# frozen_string_literal: true
+module Langchain
+  class Railtie < Rails::Railtie
+    initializer "langchain" do
+      ActiveSupport.on_load(:active_record) do
+        ::ActiveRecord::Base.include Langchain::ActiveRecord::Hooks
+      end
+    end
+  end
+end

data/lib/langchain/tool/base.rb CHANGED Viewed

@@ -27,7 +27,7 @@ module Langchain::Tool
   #
   # 3. Pass the tools when Agent is instantiated.
   #
-  #     agent = Langchain::Agent::ChainOfThoughtAgent.new(
+  #     agent = Langchain::Agent::ReActAgent.new(
   #       llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
   #       llm_api_key: ENV["OPENAI_API_KEY"],
   #       tools: ["google_search", "calculator", "wikipedia"]

data/lib/langchain/tool/database.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module Langchain::Tool
       The input to this tool should be valid SQL.
     DESC
-    attr_reader :db, :requested_tables, :except_tables
+    attr_reader :db, :requested_tables, :excluded_tables
     #
     # Establish a database connection
@@ -25,16 +25,15 @@ module Langchain::Tool
     # @return [Database] Database object
     #
-    def initialize(connection_string:, tables: [], except_tables: [])
+    def initialize(connection_string:, tables: [], exclude_tables: [])
       depends_on "sequel"
       require "sequel"
-      require "sequel/extensions/schema_dumper"
       raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
       @db = Sequel.connect(connection_string)
       @requested_tables = tables
-      @except_tables = except_tables
+      @excluded_tables = exclude_tables
     end
     #
@@ -46,20 +45,31 @@ module Langchain::Tool
       Langchain.logger.info("Dumping schema tables and keys", for: self.class)
       schema = ""
       db.tables.each do |table|
-        next if except_tables.include?(table)
+        next if excluded_tables.include?(table)
         next unless requested_tables.empty? || requested_tables.include?(table)
+        primary_key_columns = []
+        primary_key_column_count = db.schema(table).count { |column| column[1][:primary_key] == true }
         schema << "CREATE TABLE #{table}(\n"
         db.schema(table).each do |column|
           schema << "#{column[0]} #{column[1][:type]}"
-          schema << " PRIMARY KEY" if column[1][:primary_key] == true
-          schema << "," unless column == db.schema(table).last
-          schema << "\n"
+          if column[1][:primary_key] == true
+            schema << " PRIMARY KEY" if primary_key_column_count == 1
+          else
+            primary_key_columns << column[0]
+          end
+          schema << ",\n" unless column == db.schema(table).last && primary_key_column_count == 1
+        end
+        if primary_key_column_count > 1
+          schema << "PRIMARY KEY (#{primary_key_columns.join(",")})"
         end
-        schema << ");\n"
         db.foreign_key_list(table).each do |fk|
-          schema << "ALTER TABLE #{table} ADD FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]});\n"
+          schema << ",\n" if fk == db.foreign_key_list(table).first
+          schema << "FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]})"
+          schema << ",\n" unless fk == db.foreign_key_list(table).last
         end
+        schema << ");\n"
       end
       schema
     end

data/lib/langchain/utils/token_length/ai21_validator.rb ADDED Viewed

@@ -0,0 +1,36 @@
+# frozen_string_literal: true
+module Langchain
+  module Utils
+    module TokenLength
+      #
+      # This class is meant to validate the length of the text passed in to AI21's API.
+      # It is used to validate the token length before the API call is made
+      #
+      class AI21Validator < BaseValidator
+        TOKEN_LIMITS = {
+          "j2-ultra" => 8192,
+          "j2-mid" => 8192,
+          "j2-light" => 8192
+        }.freeze
+        #
+        # Calculate token length for a given text and model name
+        #
+        # @param text [String] The text to calculate the token length for
+        # @param model_name [String] The model name to validate against
+        # @return [Integer] The token length of the text
+        #
+        def self.token_length(text, model_name, client)
+          res = client.tokenize(text)
+          res.dig(:tokens).length
+        end
+        def self.token_limit(model_name)
+          TOKEN_LIMITS[model_name]
+        end
+      end
+    end
+  end
+end

data/lib/langchain/utils/token_length/cohere_validator.rb ADDED Viewed

@@ -0,0 +1,44 @@
+# frozen_string_literal: true
+module Langchain
+  module Utils
+    module TokenLength
+      #
+      # This class is meant to validate the length of the text passed in to Cohere's API.
+      # It is used to validate the token length before the API call is made
+      #
+      class CohereValidator < BaseValidator
+        TOKEN_LIMITS = {
+          # Source:
+          # https://docs.cohere.com/docs/models
+          "command-light" => 4096,
+          "command" => 4096,
+          "base-light" => 2048,
+          "base" => 2048,
+          "embed-english-light-v2.0" => 512,
+          "embed-english-v2.0" => 512,
+          "embed-multilingual-v2.0" => 256,
+          "summarize-medium" => 2048,
+          "summarize-xlarge" => 2048
+        }.freeze
+        #
+        # Calculate token length for a given text and model name
+        #
+        # @param text [String] The text to calculate the token length for
+        # @param model_name [String] The model name to validate against
+        # @return [Integer] The token length of the text
+        #
+        def self.token_length(text, model_name, client)
+          res = client.tokenize(text: text)
+          res["tokens"].length
+        end
+        def self.token_limit(model_name)
+          TOKEN_LIMITS[model_name]
+        end
+      end
+    end
+  end
+end

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
     # Initialize the Weaviate adapter
     # @param url [String] The URL of the Weaviate instance
     # @param api_key [String] The API key to use
-    # @param index_name [String] The name of the index to use
+    # @param index_name [String] The capitalized name of the index to use
     # @param llm [Object] The LLM client to use
     def initialize(url:, api_key:, index_name:, llm:)
       depends_on "weaviate-ruby"
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
         url: url,
         api_key: api_key
       )
+      # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
+      # TODO: Capitalize index_name
       @index_name = index_name
       super(llm: llm)
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
     # Add a list of texts to the index
     # @param texts [Array] The list of texts to add
     # @return [Hash] The response from the server
-    def add_texts(texts:)
-      objects = Array(texts).map do |text|
-        {
-          class: index_name,
-          properties: {content: text},
-          vector: llm.embed(text: text)
-        }
-      end
+    def add_texts(texts:, ids:)
       client.objects.batch_create(
-        objects: objects
+        objects: weaviate_objects(texts, ids)
       )
     end
+    # Update a list of texts in the index
+    # @param texts [Array] The list of texts to update
+    # @return [Hash] The response from the server
+    def update_texts(texts:, ids:)
+      uuids = []
+      # Retrieve the UUIDs of the objects to update
+      Array(texts).map.with_index do |text, i|
+        record = client.query.get(
+          class_name: index_name,
+          fields: "_additional { id }",
+          where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
+        )
+        uuids.push record[0].dig("_additional", "id")
+      end
+      # Update the objects
+      texts.map.with_index do |text, i|
+        client.objects.update(
+          class_name: index_name,
+          id: uuids[i],
+          properties: {
+            __id: ids[i].to_s,
+            content: text
+          },
+          vector: llm.embed(text: text)
+        )
+      end
+    end
     # Create default schema
     def create_default_schema
       client.schema.create(
         class_name: index_name,
         vectorizer: "none",
         properties: [
-          # TODO: Allow passing in your own IDs
-          {
-            dataType: ["text"],
-            name: "content"
-          }
+          # __id to be used a pointer to the original document
+          {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
+          {dataType: ["text"], name: "content"}
         ]
       )
     end
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
         class_name: index_name,
         near_vector: near_vector,
         limit: k.to_s,
-        fields: "content _additional { id }"
+        fields: "__id content _additional { id }"
       )
     end
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
       llm.chat(prompt: prompt)
     end
+    private
+    def weaviate_objects(texts, ids)
+      Array(texts).map.with_index do |text, i|
+        weaviate_object(text, ids[i])
+      end
+    end
+    def weaviate_object(text, id)
+      {
+        class: index_name,
+        properties: {
+          __id: id.to_s,
+          content: text
+        },
+        vector: llm.embed(text: text)
+      }
+    end
   end
 end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.5.7"
+  VERSION = "0.6.1"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -74,7 +74,7 @@ module Langchain
   module Agent
     autoload :Base, "langchain/agent/base"
-    autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
+    autoload :ReActAgent, "langchain/agent/react_agent/react_agent.rb"
     autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
   end
@@ -108,9 +108,11 @@ module Langchain
   module Utils
     module TokenLength
       autoload :BaseValidator, "langchain/utils/token_length/base_validator"
-      autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
-      autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
+      autoload :AI21Validator, "langchain/utils/token_length/ai21_validator"
+      autoload :CohereValidator, "langchain/utils/token_length/cohere_validator"
       autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
+      autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
+      autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
     end
   end
@@ -143,7 +145,18 @@ module Langchain
     autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
   end
+  module ActiveRecord
+    autoload :Hooks, "langchain/active_record/hooks"
+  end
+  module OutputParsers
+    autoload :Base, "langchain/output_parsers/base"
+    autoload :StructuredOutputParser, "langchain/output_parsers/structured"
+  end
   module Errors
     class BaseError < StandardError; end
   end
 end
+require "langchain/railtie" if defined?(Rails)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.5.7
+  version: 0.6.1
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-20 00:00:00.000000000 Z
+date: 2023-06-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -52,6 +52,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.0.5
+- !ruby/object:Gem::Dependency
+  name: json-schema
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 4.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 4.0.0
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
   requirement: !ruby/object:Gem::Requirement
@@ -114,14 +128,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.2.1
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.2.0
+        version: 0.2.1
 - !ruby/object:Gem::Dependency
   name: chroma-db
   requirement: !ruby/object:Gem::Requirement
@@ -142,14 +156,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.4
+        version: 0.9.5
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.9.4
+        version: 0.9.5
 - !ruby/object:Gem::Dependency
   name: docx
   requirement: !ruby/object:Gem::Requirement
@@ -184,14 +198,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.1
+        version: 0.1.2
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.1.1
+        version: 0.1.2
 - !ruby/object:Gem::Dependency
   name: google_search_results
   requirement: !ruby/object:Gem::Requirement
@@ -462,15 +476,17 @@ files:
 - Rakefile
 - examples/create_and_manage_few_shot_prompt_templates.rb
 - examples/create_and_manage_prompt_templates.rb
+- examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
 - examples/pdf_store_and_query_with_chroma.rb
 - examples/store_and_query_with_pinecone.rb
 - examples/store_and_query_with_qdrant.rb
 - examples/store_and_query_with_weaviate.rb
 - lefthook.yml
 - lib/langchain.rb
+- lib/langchain/active_record/hooks.rb
 - lib/langchain/agent/base.rb
-- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
-- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml
+- lib/langchain/agent/react_agent/react_agent.rb
+- lib/langchain/agent/react_agent/react_agent_prompt.yaml
 - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
 - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
 - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
@@ -489,6 +505,8 @@ files:
 - lib/langchain/llm/prompts/summarize_template.yaml
 - lib/langchain/llm/replicate.rb
 - lib/langchain/loader.rb
+- lib/langchain/output_parsers/base.rb
+- lib/langchain/output_parsers/structured.rb
 - lib/langchain/processors/base.rb
 - lib/langchain/processors/csv.rb
 - lib/langchain/processors/docx.rb
@@ -502,6 +520,7 @@ files:
 - lib/langchain/prompt/few_shot_prompt_template.rb
 - lib/langchain/prompt/loading.rb
 - lib/langchain/prompt/prompt_template.rb
+- lib/langchain/railtie.rb
 - lib/langchain/tool/base.rb
 - lib/langchain/tool/calculator.rb
 - lib/langchain/tool/database.rb
@@ -509,7 +528,9 @@ files:
 - lib/langchain/tool/ruby_code_interpreter.rb
 - lib/langchain/tool/weather.rb
 - lib/langchain/tool/wikipedia.rb
+- lib/langchain/utils/token_length/ai21_validator.rb
 - lib/langchain/utils/token_length/base_validator.rb
+- lib/langchain/utils/token_length/cohere_validator.rb
 - lib/langchain/utils/token_length/google_palm_validator.rb
 - lib/langchain/utils/token_length/openai_validator.rb
 - lib/langchain/utils/token_length/token_limit_exceeded.rb

/data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} RENAMED Viewed

File without changes