RubyGems - langchainrb - Versions diffs - 0.8.1 → 0.9.0 - Mend

langchainrb 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/README.md +53 -25
data/lib/langchain/assistants/assistant.rb +199 -0
data/lib/langchain/assistants/message.rb +58 -0
data/lib/langchain/assistants/thread.rb +34 -0
data/lib/langchain/chunker/markdown.rb +39 -0
data/lib/langchain/conversation/memory.rb +1 -6
data/lib/langchain/conversation.rb +7 -18
data/lib/langchain/data.rb +4 -3
data/lib/langchain/llm/ai21.rb +1 -1
data/lib/langchain/llm/azure.rb +10 -97
data/lib/langchain/llm/base.rb +1 -0
data/lib/langchain/llm/cohere.rb +4 -6
data/lib/langchain/llm/google_palm.rb +2 -0
data/lib/langchain/llm/google_vertex_ai.rb +12 -10
data/lib/langchain/llm/openai.rb +104 -160
data/lib/langchain/llm/replicate.rb +0 -6
data/lib/langchain/llm/response/anthropic_response.rb +4 -0
data/lib/langchain/llm/response/google_palm_response.rb +4 -0
data/lib/langchain/llm/response/ollama_response.rb +5 -1
data/lib/langchain/llm/response/openai_response.rb +8 -0
data/lib/langchain/loader.rb +3 -2
data/lib/langchain/processors/markdown.rb +17 -0
data/lib/langchain/tool/base.rb +24 -0
data/lib/langchain/tool/google_search.rb +1 -4
data/lib/langchain/utils/token_length/ai21_validator.rb +6 -2
data/lib/langchain/utils/token_length/base_validator.rb +1 -1
data/lib/langchain/utils/token_length/cohere_validator.rb +6 -2
data/lib/langchain/utils/token_length/google_palm_validator.rb +5 -1
data/lib/langchain/utils/token_length/openai_validator.rb +41 -0
data/lib/langchain/vectorsearch/base.rb +2 -2
data/lib/langchain/vectorsearch/epsilla.rb +5 -1
data/lib/langchain/vectorsearch/pinecone.rb +2 -2
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +2 -1
metadata +10 -5

data/lib/langchain/llm/azure.rb CHANGED Viewed

@@ -4,7 +4,7 @@ module Langchain::LLM
   # LLM interface for Azure OpenAI Service APIs: https://learn.microsoft.com/en-us/azure/ai-services/openai/
   #
   # Gem requirements:
-  #    gem "ruby-openai", "~> 6.1.0"
+  #    gem "ruby-openai", "~> 6.3.0"
   #
   # Usage:
   #    openai = Langchain::LLM::Azure.new(api_key:, llm_options: {}, embedding_deployment_url: chat_deployment_url:)
@@ -34,106 +34,19 @@ module Langchain::LLM
       @defaults = DEFAULTS.merge(default_options)
     end
-    #
-    # Generate an embedding for a given text
-    #
-    # @param text [String] The text to generate an embedding for
-    # @param params extra parameters passed to OpenAI::Client#embeddings
-    # @return [Langchain::LLM::OpenAIResponse] Response object
-    #
-    def embed(text:, **params)
-      parameters = {model: @defaults[:embeddings_model_name], input: text}
-      validate_max_tokens(text, parameters[:model])
-      response = with_api_error_handling do
-        embed_client.embeddings(parameters: parameters.merge(params))
-      end
-      Langchain::LLM::OpenAIResponse.new(response)
+    def embed(...)
+      @client = @embed_client
+      super(...)
     end
-    #
-    # Generate a completion for a given prompt
-    #
-    # @param prompt [String] The prompt to generate a completion for
-    # @param params  extra parameters passed to OpenAI::Client#complete
-    # @return [Langchain::LLM::Response::OpenaAI] Response object
-    #
-    def complete(prompt:, **params)
-      parameters = compose_parameters @defaults[:completion_model_name], params
-      parameters[:messages] = compose_chat_messages(prompt: prompt)
-      parameters[:max_tokens] = validate_max_tokens(parameters[:messages], parameters[:model])
-      response = with_api_error_handling do
-        chat_client.chat(parameters: parameters)
-      end
-      Langchain::LLM::OpenAIResponse.new(response)
+    def complete(...)
+      @client = @chat_client
+      super(...)
     end
-    #
-    # Generate a chat completion for a given prompt or messages.
-    #
-    # == Examples
-    #
-    #     # simplest case, just give a prompt
-    #     openai.chat prompt: "When was Ruby first released?"
-    #
-    #     # prompt plus some context about how to respond
-    #     openai.chat context: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
-    #
-    #     # full control over messages that get sent, equivilent to the above
-    #     openai.chat messages: [
-    #       {
-    #         role: "system",
-    #         content: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
-    #       },
-    #       {
-    #         role: "user",
-    #         content: "When was Ruby first released?"
-    #       }
-    #     ]
-    #
-    #     # few-short prompting with examples
-    #     openai.chat prompt: "When was factory_bot released?",
-    #       examples: [
-    #         {
-    #           role: "user",
-    #           content: "When was Ruby on Rails released?"
-    #         }
-    #         {
-    #           role: "assistant",
-    #           content: "2004"
-    #         },
-    #       ]
-    #
-    # @param prompt [String] The prompt to generate a chat completion for
-    # @param messages [Array<Hash>] The messages that have been sent in the conversation
-    # @param context [String] An initial context to provide as a system message, ie "You are RubyGPT, a helpful chat bot for helping people learn Ruby"
-    # @param examples [Array<Hash>] Examples of messages to provide to the model. Useful for Few-Shot Prompting
-    # @param options [Hash] extra parameters passed to OpenAI::Client#chat
-    # @yield [Hash] Stream responses back one token at a time
-    # @return [Langchain::LLM::OpenAIResponse] Response object
-    #
-    def chat(prompt: "", messages: [], context: "", examples: [], **options, &block)
-      raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
-      parameters = compose_parameters @defaults[:chat_completion_model_name], options, &block
-      parameters[:messages] = compose_chat_messages(prompt: prompt, messages: messages, context: context, examples: examples)
-      if functions
-        parameters[:functions] = functions
-      else
-        parameters[:max_tokens] = validate_max_tokens(parameters[:messages], parameters[:model])
-      end
-      response = with_api_error_handling { chat_client.chat(parameters: parameters) }
-      return if block
-      Langchain::LLM::OpenAIResponse.new(response)
+    def chat(...)
+      @client = @chat_client
+      super(...)
     end
   end
 end

data/lib/langchain/llm/base.rb CHANGED Viewed

@@ -11,6 +11,7 @@ module Langchain::LLM
   # - {Langchain::LLM::Azure}
   # - {Langchain::LLM::Cohere}
   # - {Langchain::LLM::GooglePalm}
+  # - {Langchain::LLM::GoogleVertexAi}
   # - {Langchain::LLM::HuggingFace}
   # - {Langchain::LLM::LlamaCpp}
   # - {Langchain::LLM::OpenAI}

data/lib/langchain/llm/cohere.rb CHANGED Viewed

@@ -62,17 +62,15 @@ module Langchain::LLM
       default_params.merge!(params)
-      default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
+      default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], llm: client)
       response = client.generate(**default_params)
       Langchain::LLM::CohereResponse.new response, model: @defaults[:completion_model_name]
     end
-    # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
-    def chat(...)
-      response_text = complete(...)
-      ::Langchain::Conversation::Response.new(response_text)
-    end
+    # TODO: Implement chat method: https://github.com/andreibondarev/cohere-ruby/issues/11
+    # def chat
+    # end
     # Generate a summary in English for a given text
     #

data/lib/langchain/llm/google_palm.rb CHANGED Viewed

@@ -23,6 +23,8 @@ module Langchain::LLM
       "assistant" => "ai"
     }
+    attr_reader :defaults
     def initialize(api_key:, default_options: {})
       depends_on "google_palm_api"

data/lib/langchain/llm/google_vertex_ai.rb CHANGED Viewed

@@ -21,6 +21,9 @@ module Langchain::LLM
       embeddings_model_name: "textembedding-gecko"
     }.freeze
+    # TODO: Implement token length validation
+    # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::...
     # Google Cloud has a project id and a specific region of deployment.
     # For GenAI-related things, a safe choice is us-central1.
     attr_reader :project_id, :client, :region
@@ -135,15 +138,14 @@ module Langchain::LLM
       )
     end
-    def chat(...)
-      # https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chathat
-      # Chat params: https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chat
-      # \"temperature\": 0.3,\n"
-      #       + "  \"maxDecodeSteps\": 200,\n"
-      #       + "  \"topP\": 0.8,\n"
-      #       + "  \"topK\": 40\n"
-      #       + "}";
-      raise NotImplementedError, "coming soon for Vertex AI.."
-    end
+    # def chat(...)
+    # https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chathat
+    # Chat params: https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chat
+    # \"temperature\": 0.3,\n"
+    #       + "  \"maxDecodeSteps\": 200,\n"
+    #       + "  \"topP\": 0.8,\n"
+    #       + "  \"topK\": 40\n"
+    #       + "}";
+    # end
   end
 end

data/lib/langchain/llm/openai.rb CHANGED Viewed

@@ -4,156 +4,170 @@ module Langchain::LLM
   # LLM interface for OpenAI APIs: https://platform.openai.com/overview
   #
   # Gem requirements:
-  #    gem "ruby-openai", "~> 6.1.0"
+  #    gem "ruby-openai", "~> 6.3.0"
   #
   # Usage:
-  #    openai = Langchain::LLM::OpenAI.new(api_key:, llm_options: {})
-  #
+  #    openai = Langchain::LLM::OpenAI.new(
+  #      api_key: ENV["OPENAI_API_KEY"],
+  #      llm_options: {},
+  #      default_options: {}
+  #    )
   class OpenAI < Base
     DEFAULTS = {
       n: 1,
       temperature: 0.0,
-      completion_model_name: "gpt-3.5-turbo",
       chat_completion_model_name: "gpt-3.5-turbo",
       embeddings_model_name: "text-embedding-ada-002",
       dimension: 1536
     }.freeze
-    LEGACY_COMPLETION_MODELS = %w[
-      ada
-      babbage
-      curie
-      davinci
-    ].freeze
     LENGTH_VALIDATOR = Langchain::Utils::TokenLength::OpenAIValidator
-    attr_accessor :functions
+    attr_reader :defaults
+    # Initialize an OpenAI LLM instance
+    #
+    # @param api_key [String] The API key to use
+    # @param client_options [Hash] Options to pass to the OpenAI::Client constructor
     def initialize(api_key:, llm_options: {}, default_options: {})
       depends_on "ruby-openai", req: "openai"
       @client = ::OpenAI::Client.new(access_token: api_key, **llm_options)
       @defaults = DEFAULTS.merge(default_options)
     end
-    #
     # Generate an embedding for a given text
     #
     # @param text [String] The text to generate an embedding for
-    # @param params extra parameters passed to OpenAI::Client#embeddings
+    # @param model [String] ID of the model to use
+    # @param encoding_format [String] The format to return the embeddings in. Can be either float or base64.
+    # @param user [String] A unique identifier representing your end-user
     # @return [Langchain::LLM::OpenAIResponse] Response object
-    #
-    def embed(text:, **params)
-      parameters = {model: @defaults[:embeddings_model_name], input: text}
+    def embed(
+      text:,
+      model: defaults[:embeddings_model_name],
+      encoding_format: nil,
+      user: nil
+    )
+      raise ArgumentError.new("text argument is required") if text.empty?
+      raise ArgumentError.new("model argument is required") if model.empty?
+      raise ArgumentError.new("encoding_format must be either float or base64") if encoding_format && %w[float base64].include?(encoding_format)
+      parameters = {
+        input: text,
+        model: model
+      }
+      parameters[:encoding_format] = encoding_format if encoding_format
+      parameters[:user] = user if user
       validate_max_tokens(text, parameters[:model])
       response = with_api_error_handling do
-        client.embeddings(parameters: parameters.merge(params))
+        client.embeddings(parameters: parameters)
       end
       Langchain::LLM::OpenAIResponse.new(response)
     end
-    #
+    # rubocop:disable Style/ArgumentsForwarding
     # Generate a completion for a given prompt
     #
     # @param prompt [String] The prompt to generate a completion for
-    # @param params  extra parameters passed to OpenAI::Client#complete
-    # @return [Langchain::LLM::Response::OpenaAI] Response object
-    #
+    # @param params [Hash] The parameters to pass to the `chat()` method
+    # @return [Langchain::LLM::OpenAIResponse] Response object
     def complete(prompt:, **params)
-      parameters = compose_parameters @defaults[:completion_model_name], params
-      return legacy_complete(prompt, parameters) if is_legacy_model?(parameters[:model])
-      parameters[:messages] = compose_chat_messages(prompt: prompt)
-      parameters[:max_tokens] = validate_max_tokens(parameters[:messages], parameters[:model], parameters[:max_tokens])
-      response = with_api_error_handling do
-        client.chat(parameters: parameters)
+      if params[:stop_sequences]
+        params[:stop] = params.delete(:stop_sequences)
       end
-      Langchain::LLM::OpenAIResponse.new(response)
+      # Should we still accept the `messages: []` parameter here?
+      messages = [{role: "user", content: prompt}]
+      chat(messages: messages, **params)
     end
+    # rubocop:enable Style/ArgumentsForwarding
-    #
     # Generate a chat completion for a given prompt or messages.
     #
-    # == Examples
-    #
-    #     # simplest case, just give a prompt
-    #     openai.chat prompt: "When was Ruby first released?"
-    #
-    #     # prompt plus some context about how to respond
-    #     openai.chat context: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
-    #
-    #     # full control over messages that get sent, equivilent to the above
-    #     openai.chat messages: [
-    #       {
-    #         role: "system",
-    #         content: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
-    #       },
-    #       {
-    #         role: "user",
-    #         content: "When was Ruby first released?"
-    #       }
-    #     ]
-    #
-    #     # few-short prompting with examples
-    #     openai.chat prompt: "When was factory_bot released?",
-    #       examples: [
-    #         {
-    #           role: "user",
-    #           content: "When was Ruby on Rails released?"
-    #         }
-    #         {
-    #           role: "assistant",
-    #           content: "2004"
-    #         },
-    #       ]
-    #
-    # @param prompt [String] The prompt to generate a chat completion for
-    # @param messages [Array<Hash>] The messages that have been sent in the conversation
-    # @param context [String] An initial context to provide as a system message, ie "You are RubyGPT, a helpful chat bot for helping people learn Ruby"
-    # @param examples [Array<Hash>] Examples of messages to provide to the model. Useful for Few-Shot Prompting
-    # @param options [Hash] extra parameters passed to OpenAI::Client#chat
-    # @yield [Hash] Stream responses back one token at a time
-    # @return [Langchain::LLM::OpenAIResponse] Response object
-    #
-    def chat(prompt: "", messages: [], context: "", examples: [], **options, &block)
-      raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
-      parameters = compose_parameters @defaults[:chat_completion_model_name], options, &block
-      parameters[:messages] = compose_chat_messages(prompt: prompt, messages: messages, context: context, examples: examples)
+    # @param messages [Array<Hash>] List of messages comprising the conversation so far
+    # @param model [String] ID of the model to use
+    def chat(
+      messages: [],
+      model: defaults[:chat_completion_model_name],
+      frequency_penalty: nil,
+      logit_bias: nil,
+      logprobs: nil,
+      top_logprobs: nil,
+      max_tokens: nil,
+      n: defaults[:n],
+      presence_penalty: nil,
+      response_format: nil,
+      seed: nil,
+      stop: nil,
+      stream: nil,
+      temperature: defaults[:temperature],
+      top_p: nil,
+      tools: [],
+      tool_choice: nil,
+      user: nil,
+      &block
+    )
+      raise ArgumentError.new("messages argument is required") if messages.empty?
+      raise ArgumentError.new("model argument is required") if model.empty?
+      raise ArgumentError.new("'tool_choice' is only allowed when 'tools' are specified.") if tool_choice && tools.empty?
+      parameters = {
+        messages: messages,
+        model: model
+      }
+      parameters[:frequency_penalty] = frequency_penalty if frequency_penalty
+      parameters[:logit_bias] = logit_bias if logit_bias
+      parameters[:logprobs] = logprobs if logprobs
+      parameters[:top_logprobs] = top_logprobs if top_logprobs
+      # TODO: Fix max_tokens validation to account for tools/functions
+      parameters[:max_tokens] = max_tokens if max_tokens # || validate_max_tokens(parameters[:messages], parameters[:model])
+      parameters[:n] = n if n
+      parameters[:presence_penalty] = presence_penalty if presence_penalty
+      parameters[:response_format] = response_format if response_format
+      parameters[:seed] = seed if seed
+      parameters[:stop] = stop if stop
+      parameters[:stream] = stream if stream
+      parameters[:temperature] = temperature if temperature
+      parameters[:top_p] = top_p if top_p
+      parameters[:tools] = tools if tools.any?
+      parameters[:tool_choice] = tool_choice if tool_choice
+      parameters[:user] = user if user
+      # TODO: Clean this part up
+      if block
+        @response_chunks = []
+        parameters[:stream] = proc do |chunk, _bytesize|
+          chunk_content = chunk.dig("choices", 0)
+          @response_chunks << chunk
+          yield chunk_content
+        end
+      end
-      if functions
-        parameters[:functions] = functions
-      else
-        parameters[:max_tokens] = validate_max_tokens(parameters[:messages], parameters[:model], parameters[:max_tokens])
+      response = with_api_error_handling do
+        client.chat(parameters: parameters)
       end
-      response = with_api_error_handling { client.chat(parameters: parameters) }
       response = response_from_chunks if block
       reset_response_chunks
       Langchain::LLM::OpenAIResponse.new(response)
     end
-    #
     # Generate a summary for a given text
     #
     # @param text [String] The text to generate a summary for
     # @return [String] The summary
-    #
     def summarize(text:)
       prompt_template = Langchain::Prompt.load_from_path(
         file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.yaml")
       )
       prompt = prompt_template.format(text: text)
-      complete(prompt: prompt, temperature: @defaults[:temperature])
-      # Should this return a Langchain::LLM::OpenAIResponse as well?
+      complete(prompt: prompt)
     end
     private
@@ -164,71 +178,6 @@ module Langchain::LLM
       @response_chunks = []
     end
-    def is_legacy_model?(model)
-      LEGACY_COMPLETION_MODELS.any? { |legacy_model| model.include?(legacy_model) }
-    end
-    def legacy_complete(prompt, parameters)
-      Langchain.logger.warn "DEPRECATION WARNING: The model #{parameters[:model]} is deprecated. Please use gpt-3.5-turbo instead. Details: https://platform.openai.com/docs/deprecations/2023-07-06-gpt-and-embeddings"
-      parameters[:prompt] = prompt
-      parameters[:max_tokens] = validate_max_tokens(prompt, parameters[:model])
-      response = with_api_error_handling do
-        client.completions(parameters: parameters)
-      end
-      response.dig("choices", 0, "text")
-    end
-    def compose_parameters(model, params, &block)
-      default_params = {model: model, temperature: @defaults[:temperature], n: @defaults[:n]}
-      default_params[:stop] = params.delete(:stop_sequences) if params[:stop_sequences]
-      parameters = default_params.merge(params)
-      if block
-        @response_chunks = []
-        parameters[:stream] = proc do |chunk, _bytesize|
-          chunk_content = chunk.dig("choices", 0)
-          @response_chunks << chunk
-          yield chunk_content
-        end
-      end
-      parameters
-    end
-    def compose_chat_messages(prompt:, messages: [], context: "", examples: [])
-      history = []
-      history.concat transform_messages(examples) unless examples.empty?
-      history.concat transform_messages(messages) unless messages.empty?
-      unless context.nil? || context.empty?
-        history.reject! { |message| message[:role] == "system" }
-        history.prepend({role: "system", content: context})
-      end
-      unless prompt.empty?
-        if history.last && history.last[:role] == "user"
-          history.last[:content] += "\n#{prompt}"
-        else
-          history.append({role: "user", content: prompt})
-        end
-      end
-      history
-    end
-    def transform_messages(messages)
-      messages.map do |message|
-        {
-          role: message[:role],
-          content: message[:content]
-        }
-      end
-    end
     def with_api_error_handling
       response = yield
       return if response.empty?
@@ -239,12 +188,7 @@ module Langchain::LLM
     end
     def validate_max_tokens(messages, model, max_tokens = nil)
-      LENGTH_VALIDATOR.validate_max_tokens!(messages, model, max_tokens: max_tokens)
-    end
-    def extract_response(response)
-      results = response.dig("choices").map { |choice| choice.dig("message", "content") }
-      (results.size == 1) ? results.first : results
+      LENGTH_VALIDATOR.validate_max_tokens!(messages, model, max_tokens: max_tokens, llm: self)
     end
     def response_from_chunks

data/lib/langchain/llm/replicate.rb CHANGED Viewed

@@ -77,12 +77,6 @@ module Langchain::LLM
       Langchain::LLM::ReplicateResponse.new(response, model: @defaults[:completion_model_name])
     end
-    # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
-    def chat(...)
-      response_text = complete(...)
-      ::Langchain::Conversation::Response.new(response_text)
-    end
     #
     # Generate a summary for a given text
     #

data/lib/langchain/llm/response/anthropic_response.rb CHANGED Viewed

@@ -25,5 +25,9 @@ module Langchain::LLM
     def log_id
       raw_response.dig("log_id")
     end
+    def role
+      "assistant"
+    end
   end
 end

data/lib/langchain/llm/response/google_palm_response.rb CHANGED Viewed

@@ -32,5 +32,9 @@ module Langchain::LLM
     def embeddings
       [raw_response.dig("embedding", "value")]
     end
+    def role
+      "assistant"
+    end
   end
 end

data/lib/langchain/llm/response/ollama_response.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Langchain::LLM
     end
     def completion
-      raw_response.first
+      completions.first
     end
     def completions
@@ -22,5 +22,9 @@ module Langchain::LLM
     def embeddings
       [raw_response&.dig("embedding")]
     end
+    def role
+      "assistant"
+    end
   end
 end

data/lib/langchain/llm/response/openai_response.rb CHANGED Viewed

@@ -16,10 +16,18 @@ module Langchain::LLM
       completions&.dig(0, "message", "content")
     end
+    def role
+      completions&.dig(0, "message", "role")
+    end
     def chat_completion
       completion
     end
+    def tool_calls
+      chat_completions&.dig(0, "message", "tool_calls")
+    end
     def embedding
       embeddings&.first
     end

data/lib/langchain/loader.rb CHANGED Viewed

@@ -37,9 +37,10 @@ module Langchain
     # @param path [String | Pathname] path to file or URL
     # @param options [Hash] options passed to the processor class used to process the data
     # @return [Langchain::Loader] loader instance
-    def initialize(path, options = {})
+    def initialize(path, options = {}, chunker: Langchain::Chunker::Text)
       @options = options
       @path = path
+      @chunker = chunker
     end
     # Is the path a URL?
@@ -112,7 +113,7 @@ module Langchain
         processor_klass.new(@options).parse(@raw_data)
       end
-      Langchain::Data.new(result)
+      Langchain::Data.new(result, source: @options[:source], chunker: @chunker)
     end
     def processor_klass

data/lib/langchain/processors/markdown.rb ADDED Viewed

@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Markdown < Base
+      EXTENSIONS = [".markdown", ".md"]
+      CONTENT_TYPES = ["text/markdown"]
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [String]
+      def parse(data)
+        data.read
+      end
+    end
+  end
+end