RubyGems - langchainrb - Versions diffs - 0.11.2 → 0.11.3 - Mend

langchainrb 0.11.2 → 0.11.3

Files changed (12) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/README.md +2 -2
data/lib/langchain/assistants/assistant.rb +1 -0
data/lib/langchain/llm/anthropic.rb +86 -11
data/lib/langchain/llm/openai.rb +1 -1
data/lib/langchain/llm/response/anthropic_response.rb +17 -1
data/lib/langchain/processors/csv.rb +37 -3
data/lib/langchain/processors/pptx.rb +29 -0
data/lib/langchain/vectorsearch/pgvector.rb +2 -1
data/lib/langchain/version.rb +1 -1
metadata +21 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0d2eb76b006864583607672a93cd81a5c37b259a34a2760f7d686f8923b60b22
-  data.tar.gz: 4e307d737f9e519e68c6adb632ff863ca365907789183756fcabe7bf0a79df4f
+  metadata.gz: c678ae75bc25b0501223f5b6ffd396a9159af4d0ddd87ddb1657429ed2ba24ce
+  data.tar.gz: df50ef0a6d9c1a3100153a06084556cac983069d1a38739bd6606f39f63bd332
 SHA512:
-  metadata.gz: 757e6fd5733b0365eb5b4309589f8dfab8659f451a8f31e922c7ded5ca1d974ab6d6a9b8178aa773f2b812058ea51db2f1560d18215beb4364c15b953f57648d
-  data.tar.gz: 8c7ffe8b7f94c32a0e1afedff943871a097811266981e5cb0e3353b66f7eebdb387565f3d2292dda665d69dac2ad29584cad14c07c90b9c2def768ec93a403ea
+  metadata.gz: 3ec9f92f4c6221184b7a0a2c118caa6a56e7bc8505a83d5b5acb4daeb769ff90d8822b43c28f57adc06435dd2df5577268721345c4061d3dad6ecb919be18efc
+  data.tar.gz: 53d54b0c6a82082438f2e2f1ca70d097a9b916bc283b72e52ce466b6f012c9624cf094586b17e93eaa49a796bf9911051d3f4b494b9ecc93c3ac6ee6cdc7e8fe

data/CHANGELOG.md CHANGED Viewed

@@ -1,4 +1,12 @@
 ## [Unreleased]
+- New `Langchain::Processors::Pptx` to parse .pptx files
+- New `Langchain::LLM::Anthropic#chat()` support
+- Misc fixes
+## [0.11.3]
+- New `Langchain::Processors::Pptx` to parse .pptx files
+- New `Langchain::LLM::Anthropic#chat()` support
+- Misc fixes
 ## [0.11.2]
 - New `Langchain::Assistant#clear_thread!` and `Langchain::Assistant#instructions=` methods

data/README.md CHANGED Viewed

@@ -59,7 +59,7 @@ Langchain.rb wraps supported LLMs in a unified interface allowing you to easily
 | --------                                                                                        |:------------------:| :-------:          | :-----------------: | :-------:          | :----------------- |
 | [OpenAI](https://openai.com/?utm_source=langchainrb&utm_medium=github)                          | ✅                 | ✅                 | ✅                  | ❌                 | Including Azure OpenAI |
 | [AI21](https://ai21.com/?utm_source=langchainrb&utm_medium=github)                              | ❌                 | ✅                 | ❌                  | ✅                 |                    |
-| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github)                    | ❌                 | ✅                 | ❌                  | ❌                 |                    |
+| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github)                    | ❌                 | ✅                 | ✅                  | ❌                 |                    |
 | [AWS Bedrock](https://aws.amazon.com/bedrock?utm_source=langchainrb&utm_medium=github)          | ✅                 | ✅                 | ❌                  | ❌                 | Provides AWS, Cohere, AI21, Antropic and Stability AI models |
 | [Cohere](https://cohere.com/?utm_source=langchainrb&utm_medium=github)                          | ✅                 | ✅                 | ✅                  | ✅                 |                    |
 | [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github)         | ✅                 | ✅                 | ✅                  | ✅                 |                    |
@@ -372,7 +372,7 @@ my_docx = Langchain.root.join("path/to/my.docx")
 client.add_data(paths: [my_pdf, my_text, my_docx])
 ```
-Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml.
+Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml, pptx.
 Retrieve similar documents based on the query string passed in:
 ```ruby

data/lib/langchain/assistants/assistant.rb CHANGED Viewed

@@ -129,6 +129,7 @@ module Langchain
     #
     # @return [Array] Empty messages array
     def clear_thread!
+      # TODO: If this a bug? Should we keep the "system" message?
       thread.messages = []
     end

data/lib/langchain/llm/anthropic.rb CHANGED Viewed

@@ -14,12 +14,19 @@ module Langchain::LLM
     DEFAULTS = {
       temperature: 0.0,
       completion_model_name: "claude-2",
+      chat_completion_model_name: "claude-3-sonnet-20240229",
       max_tokens_to_sample: 256
     }.freeze
     # TODO: Implement token length validator for Anthropic
     # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
+    # Initialize an Anthropic LLM instance
+    #
+    # @param api_key [String] The API key to use
+    # @param llm_options [Hash] Options to pass to the Anthropic client
+    # @param default_options [Hash] Default options to use on every call to LLM, e.g.: { temperature:, completion_model_name:, chat_completion_model_name:, max_tokens_to_sample: }
+    # @return [Langchain::LLM::Anthropic] Langchain::LLM::Anthropic instance
     def initialize(api_key:, llm_options: {}, default_options: {})
       depends_on "anthropic"
@@ -27,17 +34,43 @@ module Langchain::LLM
       @defaults = DEFAULTS.merge(default_options)
     end
-    #
     # Generate a completion for a given prompt
     #
-    # @param prompt [String] The prompt to generate a completion for
-    # @param params [Hash] extra parameters passed to Anthropic::Client#complete
+    # @param prompt [String] Prompt to generate a completion for
+    # @param model [String] The model to use
+    # @param max_tokens_to_sample [Integer] The maximum number of tokens to sample
+    # @param stop_sequences [Array<String>] The stop sequences to use
+    # @param temperature [Float] The temperature to use
+    # @param top_p [Float] The top p value to use
+    # @param top_k [Integer] The top k value to use
+    # @param metadata [Hash] The metadata to use
+    # @param stream [Boolean] Whether to stream the response
     # @return [Langchain::LLM::AnthropicResponse] The completion
-    #
-    def complete(prompt:, **params)
-      parameters = compose_parameters @defaults[:completion_model_name], params
+    def complete(
+      prompt:,
+      model: @defaults[:completion_model_name],
+      max_tokens_to_sample: @defaults[:max_tokens_to_sample],
+      stop_sequences: nil,
+      temperature: @defaults[:temperature],
+      top_p: nil,
+      top_k: nil,
+      metadata: nil,
+      stream: nil
+    )
+      raise ArgumentError.new("model argument is required") if model.empty?
+      raise ArgumentError.new("max_tokens_to_sample argument is required") if max_tokens_to_sample.nil?
-      parameters[:prompt] = prompt
+      parameters = {
+        model: model,
+        prompt: prompt,
+        max_tokens_to_sample: max_tokens_to_sample,
+        temperature: temperature
+      }
+      parameters[:stop_sequences] = stop_sequences if stop_sequences
+      parameters[:top_p] = top_p if top_p
+      parameters[:top_k] = top_k if top_k
+      parameters[:metadata] = metadata if metadata
+      parameters[:stream] = stream if stream
       # TODO: Implement token length validator for Anthropic
       # parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
@@ -46,12 +79,54 @@ module Langchain::LLM
       Langchain::LLM::AnthropicResponse.new(response)
     end
-    private
+    # Generate a chat completion for given messages
+    #
+    # @param messages [Array<String>] Input messages
+    # @param model [String] The model that will complete your prompt
+    # @param max_tokens [Integer] Maximum number of tokens to generate before stopping
+    # @param metadata [Hash] Object describing metadata about the request
+    # @param stop_sequences [Array<String>] Custom text sequences that will cause the model to stop generating
+    # @param stream [Boolean] Whether to incrementally stream the response using server-sent events
+    # @param system [String] System prompt
+    # @param temperature [Float] Amount of randomness injected into the response
+    # @param tools [Array<String>] Definitions of tools that the model may use
+    # @param top_k [Integer] Only sample from the top K options for each subsequent token
+    # @param top_p [Float] Use nucleus sampling.
+    # @return [Langchain::LLM::AnthropicResponse] The chat completion
+    def chat(
+      messages: [],
+      model: @defaults[:chat_completion_model_name],
+      max_tokens: @defaults[:max_tokens_to_sample],
+      metadata: nil,
+      stop_sequences: nil,
+      stream: nil,
+      system: nil,
+      temperature: @defaults[:temperature],
+      tools: [],
+      top_k: nil,
+      top_p: nil
+    )
+      raise ArgumentError.new("messages argument is required") if messages.empty?
+      raise ArgumentError.new("model argument is required") if model.empty?
+      raise ArgumentError.new("max_tokens argument is required") if max_tokens.nil?
+      parameters = {
+        messages: messages,
+        model: model,
+        max_tokens: max_tokens,
+        temperature: temperature
+      }
+      parameters[:metadata] = metadata if metadata
+      parameters[:stop_sequences] = stop_sequences if stop_sequences
+      parameters[:stream] = stream if stream
+      parameters[:system] = system if system
+      parameters[:tools] = tools if tools.any?
+      parameters[:top_k] = top_k if top_k
+      parameters[:top_p] = top_p if top_p
-    def compose_parameters(model, params)
-      default_params = {model: model}.merge(@defaults.except(:completion_model_name))
+      response = client.messages(parameters: parameters)
-      default_params.merge(params)
+      Langchain::LLM::AnthropicResponse.new(response)
     end
     # TODO: Implement token length validator for Anthropic

data/lib/langchain/llm/openai.rb CHANGED Viewed

@@ -100,7 +100,7 @@ module Langchain::LLM
     end
     # rubocop:enable Style/ArgumentsForwarding
-    # Generate a chat completion for a given prompt or messages.
+    # Generate a chat completion for given messages.
     #
     # @param messages [Array<Hash>] List of messages comprising the conversation so far
     # @param model [String] ID of the model to use

data/lib/langchain/llm/response/anthropic_response.rb CHANGED Viewed

@@ -10,6 +10,10 @@ module Langchain::LLM
       completions.first
     end
+    def chat_completion
+      raw_response.dig("content", 0, "text")
+    end
     def completions
       [raw_response.dig("completion")]
     end
@@ -26,8 +30,20 @@ module Langchain::LLM
       raw_response.dig("log_id")
     end
+    def prompt_tokens
+      raw_response.dig("usage", "input_tokens").to_i
+    end
+    def completion_tokens
+      raw_response.dig("usage", "output_tokens").to_i
+    end
+    def total_tokens
+      prompt_tokens + completion_tokens
+    end
     def role
-      "assistant"
+      raw_response.dig("role")
     end
   end
 end

data/lib/langchain/processors/csv.rb CHANGED Viewed

@@ -5,15 +5,26 @@ require "csv"
 module Langchain
   module Processors
     class CSV < Base
+      class InvalidChunkMode < StandardError; end
       EXTENSIONS = [".csv"]
       CONTENT_TYPES = ["text/csv"]
+      CHUNK_MODE = {
+        row: "row",
+        file: "file"
+      }
       # Parse the document and return the text
       # @param [File] data
-      # @return [Array of Hash]
+      # @return [String]
       def parse(data)
-        ::CSV.new(data.read, col_sep: separator).map do |row|
-          row.map(&:strip)
+        case chunk_mode
+        when CHUNK_MODE[:row]
+          chunk_row(data)
+        when CHUNK_MODE[:file]
+          chunk_file(data)
+        else
+          raise InvalidChunkMode
         end
       end
@@ -22,6 +33,29 @@ module Langchain
       def separator
         @options[:col_sep] || ","
       end
+      def chunk_mode
+        if @options[:chunk_mode].to_s.empty?
+          CHUNK_MODE[:row]
+        else
+          raise InvalidChunkMode unless CHUNK_MODE.value?(@options[:chunk_mode])
+          @options[:chunk_mode]
+        end
+      end
+      def chunk_row(data)
+        ::CSV.new(data.read, col_sep: separator).map do |row|
+          row
+            .compact
+            .map(&:strip)
+            .join(separator)
+        end.join("\n\n")
+      end
+      def chunk_file(data)
+        data.read
+      end
     end
   end
 end

data/lib/langchain/processors/pptx.rb ADDED Viewed

@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Pptx < Base
+      EXTENSIONS = [".pptx"]
+      CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
+      def initialize(*)
+        depends_on "power_point_pptx"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        presentation = PowerPointPptx::Document.open(data)
+        slides = presentation.slides
+        contents = slides.map(&:content)
+        text = contents.map do |sections|
+          sections.map(&:strip).join(" ")
+        end
+        text.join("\n\n")
+      end
+    end
+  end
+end

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -16,7 +16,8 @@ module Langchain::Vectorsearch
     # The operators supported by the PostgreSQL vector search adapter
     OPERATORS = {
       "cosine_distance" => "cosine",
-      "euclidean_distance" => "euclidean"
+      "euclidean_distance" => "euclidean",
+      "inner_product_distance" => "inner_product"
     }
     DEFAULT_OPERATOR = "cosine_distance"

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.11.2"
+  VERSION = "0.11.3"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.11.2
+  version: 0.11.3
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-04-13 00:00:00.000000000 Z
+date: 2024-04-17 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: activesupport
@@ -238,16 +238,16 @@ dependencies:
   name: anthropic
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: 0.1.0
+        version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: 0.1.0
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: aws-sdk-bedrockruntime
   requirement: !ruby/object:Gem::Requirement
@@ -682,6 +682,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: power_point_pptx
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
 description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
 email:
 - andrei.bondarev13@gmail.com
@@ -758,6 +772,7 @@ files:
 - lib/langchain/processors/jsonl.rb
 - lib/langchain/processors/markdown.rb
 - lib/langchain/processors/pdf.rb
+- lib/langchain/processors/pptx.rb
 - lib/langchain/processors/text.rb
 - lib/langchain/processors/xlsx.rb
 - lib/langchain/prompt.rb