langchainrb 0.5.2 → 0.5.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,15 +5,20 @@ require "json"
5
5
  require "yaml"
6
6
 
7
7
  module Langchain::Prompt
8
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
9
+ #
10
+ # @abstract
8
11
  class Base
9
12
  def format(**kwargs)
10
13
  raise NotImplementedError
11
14
  end
12
15
 
16
+ # @return [String] the type of the prompt
13
17
  def prompt_type
14
18
  raise NotImplementedError
15
19
  end
16
20
 
21
+ # @return [Hash] a hash representation of the prompt
17
22
  def to_h
18
23
  raise NotImplementedError
19
24
  end
@@ -1,6 +1,51 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Few Shot Prompt Templates
5
+ #
6
+ # Create a prompt with a few shot examples:
7
+ #
8
+ # prompt = Langchain::Prompt::FewShotPromptTemplate.new(
9
+ # prefix: "Write antonyms for the following words.",
10
+ # suffix: "Input: <code>{adjective}</code>\nOutput:",
11
+ # example_prompt: Langchain::Prompt::PromptTemplate.new(
12
+ # input_variables: ["input", "output"],
13
+ # template: "Input: {input}\nOutput: {output}"
14
+ # ),
15
+ # examples: [
16
+ # { "input": "happy", "output": "sad" },
17
+ # { "input": "tall", "output": "short" }
18
+ # ],
19
+ # input_variables: ["adjective"]
20
+ # )
21
+ #
22
+ # prompt.format(adjective: "good")
23
+ #
24
+ # # Write antonyms for the following words.
25
+ # #
26
+ # # Input: happy
27
+ # # Output: sad
28
+ # #
29
+ # # Input: tall
30
+ # # Output: short
31
+ # #
32
+ # # Input: good
33
+ # # Output:
34
+ #
35
+ # Save prompt template to JSON file:
36
+ #
37
+ # prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
38
+ #
39
+ # Loading a new prompt template using a JSON file:
40
+ #
41
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
42
+ # prompt.prefix # "Write antonyms for the following words."
43
+ #
44
+ # Loading a new prompt template using a YAML file:
45
+ #
46
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
47
+ # prompt.input_variables #=> ["adjective", "content"]
48
+ #
4
49
  class FewShotPromptTemplate < Base
5
50
  attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator
6
51
 
@@ -1,6 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Prompt Templates
5
+ #
6
+ # Create a prompt with one input variable:
7
+ #
8
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
9
+ # prompt.format(adjective: "funny") # "Tell me a funny joke."
10
+ #
11
+ # Create a prompt with multiple input variables:
12
+ #
13
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
14
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
15
+ #
16
+ # Creating a PromptTemplate using just a prompt and no input_variables:
17
+ #
18
+ # prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
19
+ # prompt.input_variables # ["adjective", "content"]
20
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
21
+ #
22
+ # Save prompt template to JSON file:
23
+ #
24
+ # prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
25
+ #
26
+ # Loading a new prompt template using a JSON file:
27
+ #
28
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
29
+ # prompt.input_variables # ["adjective", "content"]
30
+ #
31
+ # Loading a new prompt template using a YAML file:
32
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
33
+ # prompt.input_variables #=> ["adjective", "content"]
34
+ #
4
35
  class PromptTemplate < Base
5
36
  attr_reader :template, :input_variables, :validate_template
6
37
 
@@ -1,16 +1,53 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Tool
4
+ # = Tools
5
+ #
6
+ # Tools are used by Agents to perform specific tasks. Basically anything is possible with enough code!
7
+ #
8
+ # == Available Tools
9
+ #
10
+ # - {Langchain::Tool::Calculator}: Calculate the result of a math expression
11
+ # - {Langchain::Tool::RubyCodeInterpretor}: Runs ruby code
12
+ # - {Langchain::Tool::Search}: search on Google (via SerpAPI)
13
+ # - {Langchain::Tool::Wikipedia}: search on Wikipedia
14
+ #
15
+ # == Usage
16
+ #
17
+ # 1. Pick the tools you'd like to pass to an Agent and install the gems listed under **Gem Requirements**
18
+ #
19
+ # # To use all 3 tools:
20
+ # gem install eqn
21
+ # gem install google_search_results
22
+ # gem install wikipedia-client
23
+ #
24
+ # 2. Set the environment variables listed under **ENV Requirements**
25
+ #
26
+ # export SERPAPI_API_KEY=paste-your-serpapi-api-key-here
27
+ #
28
+ # 3. Pass the tools when Agent is instantiated.
29
+ #
30
+ # agent = Langchain::Agent::ChainOfThoughtAgent.new(
31
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
32
+ # llm_api_key: ENV["OPENAI_API_KEY"],
33
+ # tools: ["search", "calculator", "wikipedia"]
34
+ # )
35
+ #
36
+ # 4. Confirm that the Agent is using the Tools you passed in:
37
+ #
38
+ # agent.tools
39
+ # # => ["search", "calculator", "wikipedia"]
40
+ #
41
+ # == Adding Tools
42
+ #
43
+ # 1. Create a new file in lib/langchain/tool/your_tool_name.rb
44
+ # 2. Create a class in the file that inherits from {Langchain::Tool::Base}
45
+ # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
46
+ # 4. Implement `execute(input:)` method in your tool class
47
+ # 5. Add your tool to the {file:README.md}
4
48
  class Base
5
49
  include Langchain::DependencyHelper
6
50
 
7
- # How to add additional Tools?
8
- # 1. Create a new file in lib/tool/your_tool_name.rb
9
- # 2. Create a class in the file that inherits from Langchain::Tool::Base
10
- # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
11
- # 4. Implement `execute(input:)` method in your tool class
12
- # 5. Add your tool to the README.md
13
-
14
51
  #
15
52
  # Returns the NAME constant of the tool
16
53
  #
@@ -20,6 +57,15 @@ module Langchain::Tool
20
57
  self.class.const_get(:NAME)
21
58
  end
22
59
 
60
+ #
61
+ # Returns the DESCRIPTION constant of the tool
62
+ #
63
+ # @return [String] tool description
64
+ #
65
+ def tool_description
66
+ self.class.const_get(:DESCRIPTION)
67
+ end
68
+
23
69
  #
24
70
  # Sets the DESCRIPTION constant of the tool
25
71
  #
@@ -44,7 +90,7 @@ module Langchain::Tool
44
90
  #
45
91
  # @param input [String] input to the tool
46
92
  # @return [String] answer
47
- #
93
+ # @raise NotImplementedError when not implemented
48
94
  def execute(input:)
49
95
  raise NotImplementedError, "Your tool must implement the `#execute(input:)` method that returns a string"
50
96
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Utils
5
+ module TokenLength
6
+ #
7
+ # This class is meant to validate the length of the text passed in to Google Palm's API.
8
+ # It is used to validate the token length before the API call is made
9
+ #
10
+ class GooglePalmValidator
11
+ TOKEN_LIMITS = {
12
+ # Source:
13
+ # This data can be pulled when `list_models()` method is called: https://github.com/andreibondarev/google_palm_api#usage
14
+
15
+ # chat-bison-001 is the only model that currently supports countMessageTokens functions
16
+ "chat-bison-001" => {
17
+ "input_token_limit" => 4000, # 4096 is the limit but the countMessageTokens does not return anything higher than 4000
18
+ "output_token_limit" => 1024
19
+ }
20
+ # "text-bison-001" => {
21
+ # "input_token_limit" => 8196,
22
+ # "output_token_limit" => 1024
23
+ # },
24
+ # "embedding-gecko-001" => {
25
+ # "input_token_limit" => 1024
26
+ # }
27
+ }.freeze
28
+
29
+ #
30
+ # Validate the context length of the text
31
+ #
32
+ # @param content [String | Array<String>] The text or array of texts to validate
33
+ # @param model_name [String] The model name to validate against
34
+ # @return [Integer] Whether the text is valid or not
35
+ # @raise [TokenLimitExceeded] If the text is too long
36
+ #
37
+ def self.validate_max_tokens!(google_palm_llm, content, model_name)
38
+ text_token_length = if content.is_a?(Array)
39
+ content.sum { |item| token_length(google_palm_llm, item.to_json, model_name) }
40
+ else
41
+ token_length(google_palm_llm, content, model_name)
42
+ end
43
+
44
+ leftover_tokens = TOKEN_LIMITS.dig(model_name, "input_token_limit") - text_token_length
45
+
46
+ # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
47
+ if leftover_tokens <= 0
48
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS.dig(model_name, "input_token_limit")} tokens, but the given text is #{text_token_length} tokens long."
49
+ end
50
+
51
+ leftover_tokens
52
+ end
53
+
54
+ #
55
+ # Calculate token length for a given text and model name
56
+ #
57
+ # @param llm [Langchain::LLM:GooglePalm] The Langchain::LLM:GooglePalm instance
58
+ # @param text [String] The text to calculate the token length for
59
+ # @param model_name [String] The model name to validate against
60
+ # @return [Integer] The token length of the text
61
+ #
62
+ def self.token_length(llm, text, model_name = "chat-bison-001")
63
+ response = llm.client.count_message_tokens(model: model_name, prompt: text)
64
+ response.dig("tokenCount")
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tiktoken_ruby"
4
+
5
+ module Langchain
6
+ module Utils
7
+ module TokenLength
8
+ #
9
+ # This class is meant to validate the length of the text passed in to OpenAI's API.
10
+ # It is used to validate the token length before the API call is made
11
+ #
12
+ class OpenAIValidator
13
+ TOKEN_LIMITS = {
14
+ # Source:
15
+ # https://platform.openai.com/docs/api-reference/embeddings
16
+ # https://platform.openai.com/docs/models/gpt-4
17
+ "text-embedding-ada-002" => 8191,
18
+ "gpt-3.5-turbo" => 4096,
19
+ "gpt-3.5-turbo-0301" => 4096,
20
+ "text-davinci-003" => 4097,
21
+ "text-davinci-002" => 4097,
22
+ "code-davinci-002" => 8001,
23
+ "gpt-4" => 8192,
24
+ "gpt-4-0314" => 8192,
25
+ "gpt-4-32k" => 32768,
26
+ "gpt-4-32k-0314" => 32768,
27
+ "text-curie-001" => 2049,
28
+ "text-babbage-001" => 2049,
29
+ "text-ada-001" => 2049,
30
+ "davinci" => 2049,
31
+ "curie" => 2049,
32
+ "babbage" => 2049,
33
+ "ada" => 2049
34
+ }.freeze
35
+
36
+ #
37
+ # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
38
+ #
39
+ # @param content [String | Array<String>] The text or array of texts to validate
40
+ # @param model_name [String] The model name to validate against
41
+ # @return [Integer] Whether the text is valid or not
42
+ # @raise [TokenLimitExceeded] If the text is too long
43
+ #
44
+ def self.validate_max_tokens!(content, model_name)
45
+ text_token_length = if content.is_a?(Array)
46
+ content.sum { |item| token_length(item.to_json, model_name) }
47
+ else
48
+ token_length(content, model_name)
49
+ end
50
+
51
+ max_tokens = TOKEN_LIMITS[model_name] - text_token_length
52
+
53
+ # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
54
+ if max_tokens <= 0
55
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
56
+ end
57
+
58
+ max_tokens
59
+ end
60
+
61
+ #
62
+ # Calculate token length for a given text and model name
63
+ #
64
+ # @param text [String] The text to calculate the token length for
65
+ # @param model_name [String] The model name to validate against
66
+ # @return [Integer] The token length of the text
67
+ #
68
+ def self.token_length(text, model_name)
69
+ encoder = Tiktoken.encoding_for_model(model_name)
70
+ encoder.encode(text).length
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -3,6 +3,88 @@
3
3
  require "forwardable"
4
4
 
5
5
  module Langchain::Vectorsearch
6
+ # = Vector Databases
7
+ # A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
8
+ #
9
+ # == Available vector databases
10
+ #
11
+ # - {Langchain::Vectorsearch::Chroma}
12
+ # - {Langchain::Vectorsearch::Milvus}
13
+ # - {Langchain::Vectorsearch::Pinecone}
14
+ # - {Langchain::Vectorsearch::Qdrant}
15
+ # - {Langchain::Vectorsearch::Weaviate}
16
+ # - {Langchain::Vectorsearch::Pgvector}
17
+ #
18
+ # == Usage
19
+ #
20
+ # 1. Pick a vector database from list.
21
+ # 2. Review its documentation to install the required gems, and create an account, get an API key, etc
22
+ # 3. Instantiate the vector database class:
23
+ #
24
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(
25
+ # url: ENV["WEAVIATE_URL"],
26
+ # api_key: ENV["WEAVIATE_API_KEY"],
27
+ # index_name: "Documents",
28
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm, or :replicate
29
+ # llm_api_key: ENV["OPENAI_API_KEY"] # API key for the selected LLM
30
+ # )
31
+ #
32
+ # # You can instantiate other supported vector databases the same way:
33
+ # milvus = Langchain::Vectorsearch::Milvus.new(...)
34
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(...)
35
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(...)
36
+ # chrome = Langchain::Vectorsearch::Chroma.new(...)
37
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(...)
38
+ #
39
+ # == Schema Creation
40
+ #
41
+ # `create_default_schema()` creates default schema in your vector database.
42
+ #
43
+ # search.create_default_schema
44
+ #
45
+ # (We plan on offering customizable schema creation shortly)
46
+ #
47
+ # == Adding Data
48
+ #
49
+ # You can add data with:
50
+ # 1. `add_data(path:, paths:)` to add any kind of data type
51
+ #
52
+ # my_pdf = Langchain.root.join("path/to/my.pdf")
53
+ # my_text = Langchain.root.join("path/to/my.txt")
54
+ # my_docx = Langchain.root.join("path/to/my.docx")
55
+ # my_csv = Langchain.root.join("path/to/my.csv")
56
+ #
57
+ # search.add_data(paths: [my_pdf, my_text, my_docx, my_csv])
58
+ #
59
+ # 2. `add_texts(texts:)` to only add textual data
60
+ #
61
+ # search.add_texts(
62
+ # texts: [
63
+ # "Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
64
+ # "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s"
65
+ # ]
66
+ # )
67
+ #
68
+ # == Retrieving Data
69
+ #
70
+ # `similarity_search_by_vector(embedding:, k:)` searches the vector database for the closest `k` number of embeddings.
71
+ #
72
+ # search.similarity_search_by_vector(
73
+ # embedding: ...,
74
+ # k: # number of results to be retrieved
75
+ # )
76
+ #
77
+ # `vector_store.similarity_search(query:, k:)` generates an embedding for the query and searches the vector database for the closest `k` number of embeddings.
78
+ #
79
+ # search.similarity_search_by_vector(
80
+ # embedding: ...,
81
+ # k: # number of results to be retrieved
82
+ # )
83
+ #
84
+ # `ask(question:)` generates an embedding for the passed-in question, searches the vector database for closest embeddings and then passes these as context to the LLM to generate an answer to the question.
85
+ #
86
+ # search.ask(question: "What is lorem ipsum?")
87
+ #
6
88
  class Base
7
89
  include Langchain::DependencyHelper
8
90
  extend Forwardable
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::Vectorsearch
4
+ class Hnswlib < Base
5
+ #
6
+ # Wrapper around HNSW (Hierarchical Navigable Small World) library.
7
+ # HNSWLib is an in-memory vectorstore that can be saved to a file on disk.
8
+ #
9
+ # Gem requirements:
10
+ # gem "hnswlib", "~> 0.8.1"
11
+ #
12
+ # Usage:
13
+ # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, url:, index_name:)
14
+ #
15
+
16
+ attr_reader :client, :path_to_index
17
+
18
+ #
19
+ # Initialize the HNSW vector search
20
+ #
21
+ # @param llm [Object] The LLM client to use
22
+ # @param path_to_index [String] The local path to the index file, e.g.: "/storage/index.ann"
23
+ # @return [Langchain::Vectorsearch::Hnswlib] Class instance
24
+ #
25
+ def initialize(llm:, path_to_index:)
26
+ depends_on "hnswlib"
27
+ require "hnswlib"
28
+
29
+ super(llm: llm)
30
+
31
+ @client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.default_dimension)
32
+ @path_to_index = path_to_index
33
+
34
+ initialize_index
35
+ end
36
+
37
+ #
38
+ # Add a list of texts and corresponding IDs to the index
39
+ #
40
+ # @param texts [Array] The list of texts to add
41
+ # @param ids [Array] The list of corresponding IDs (integers) to the texts
42
+ # @return [Boolean] The response from the HNSW library
43
+ #
44
+ def add_texts(texts:, ids:)
45
+ resize_index(texts.size)
46
+
47
+ Array(texts).each_with_index do |text, i|
48
+ embedding = llm.embed(text: text)
49
+
50
+ client.add_point(embedding, ids[i])
51
+ end
52
+
53
+ client.save_index(path_to_index)
54
+ end
55
+
56
+ #
57
+ # Search for similar texts
58
+ #
59
+ # @param query [String] The text to search for
60
+ # @param k [Integer] The number of results to return
61
+ # @return [Array] Results in the format `[[id1, distance3], [id2, distance2]]`
62
+ #
63
+ def similarity_search(
64
+ query:,
65
+ k: 4
66
+ )
67
+ embedding = llm.embed(text: query)
68
+
69
+ similarity_search_by_vector(
70
+ embedding: embedding,
71
+ k: k
72
+ )
73
+ end
74
+
75
+ #
76
+ # Search for the K nearest neighbors of a given vector
77
+ #
78
+ # @param embedding [Array] The embedding to search for
79
+ # @param k [Integer] The number of results to return
80
+ # @return [Array] Results in the format `[[id1, distance3], [id2, distance2]]`
81
+ #
82
+ def similarity_search_by_vector(
83
+ embedding:,
84
+ k: 4
85
+ )
86
+ client.search_knn(embedding, k)
87
+ end
88
+
89
+ private
90
+
91
+ #
92
+ # Optionally resizes the index if there's no space for new data
93
+ #
94
+ # @param num_of_elements_to_add [Integer] The number of elements to add to the index
95
+ #
96
+ def resize_index(num_of_elements_to_add)
97
+ current_count = client.current_count
98
+
99
+ if (current_count + num_of_elements_to_add) > client.max_elements
100
+ new_size = current_count + num_of_elements_to_add
101
+
102
+ client.resize_index(new_size)
103
+ end
104
+ end
105
+
106
+ #
107
+ # Loads or initializes the new index
108
+ #
109
+ def initialize_index
110
+ if File.exist?(path_to_index)
111
+ client.load_index(path_to_index)
112
+
113
+ Langchain.logger.info("[#{self.class.name}]".blue + ": Successfully loaded the index at \"#{path_to_index}\"")
114
+ else
115
+ # Default max_elements: 100, but we constantly resize the index as new data is written to it
116
+ client.init_index(max_elements: 100)
117
+
118
+ Langchain.logger.info("[#{self.class.name}]".blue + ": Creating a new index at \"#{path_to_index}\"")
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.4"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -6,10 +6,53 @@ require "colorize"
6
6
 
7
7
  require_relative "./langchain/version"
8
8
 
9
+ # Langchain.rb a is library for building LLM-backed Ruby applications. It is an abstraction layer that sits on top of the emerging AI-related tools that makes it easy for developers to consume and string those services together.
10
+ #
11
+ # = Installation
12
+ # Install the gem and add to the application's Gemfile by executing:
13
+ #
14
+ # $ bundle add langchainrb
15
+ #
16
+ # If bundler is not being used to manage dependencies, install the gem by executing:
17
+ #
18
+ # $ gem install langchainrb
19
+ #
20
+ # Require the gem to start using it:
21
+ #
22
+ # require "langchain"
23
+ #
24
+ # = Concepts
25
+ #
26
+ # == Processors
27
+ # Processors load and parse/process various data types such as CSVs, PDFs, Word documents, HTML pages, and others.
28
+ #
29
+ # == Chunkers
30
+ # Chunkers split data based on various available options such as delimeters, chunk sizes or custom-defined functions. Chunkers are used when data needs to be split up before being imported in vector databases.
31
+ #
32
+ # == Prompts
33
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
34
+ #
35
+ # == Large Language Models (LLMs)
36
+ # LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
37
+ #
38
+ # == Vectorsearch Databases
39
+ # Vector database is a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
40
+ #
41
+ # == Embedding
42
+ # Word embedding or word vector is an approach with which we represent documents and words. It is defined as a numeric vector input that allows words with similar meanings to have the same representation. It can approximate meaning and represent a word in a lower dimensional space.
43
+ #
44
+ #
45
+ # = Logging
46
+ #
47
+ # LangChain.rb uses standard logging mechanisms and defaults to :debug level. Most messages are at info level, but we will add debug or warn statements as needed. To show all log messages:
48
+ #
49
+ # Langchain.logger.level = :info
9
50
  module Langchain
10
51
  class << self
52
+ # @return [Logger]
11
53
  attr_accessor :logger
12
54
 
55
+ # @return [Pathname]
13
56
  attr_reader :root
14
57
  end
15
58
 
@@ -19,6 +62,7 @@ module Langchain
19
62
 
20
63
  autoload :Loader, "langchain/loader"
21
64
  autoload :Data, "langchain/data"
65
+ autoload :Chat, "langchain/chat"
22
66
  autoload :DependencyHelper, "langchain/dependency_helper"
23
67
 
24
68
  module Agent
@@ -49,12 +93,18 @@ module Langchain
49
93
  end
50
94
 
51
95
  module Utils
52
- autoload :TokenLengthValidator, "langchain/utils/token_length_validator"
96
+ module TokenLength
97
+ class TokenLimitExceeded < StandardError; end
98
+
99
+ autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
100
+ autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
101
+ end
53
102
  end
54
103
 
55
104
  module Vectorsearch
56
105
  autoload :Base, "langchain/vectorsearch/base"
57
106
  autoload :Chroma, "langchain/vectorsearch/chroma"
107
+ autoload :Hnswlib, "langchain/vectorsearch/hnswlib"
58
108
  autoload :Milvus, "langchain/vectorsearch/milvus"
59
109
  autoload :Pinecone, "langchain/vectorsearch/pinecone"
60
110
  autoload :Pgvector, "langchain/vectorsearch/pgvector"