langchainrb 0.5.2 → 0.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,15 +5,20 @@ require "json"
5
5
  require "yaml"
6
6
 
7
7
  module Langchain::Prompt
8
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
9
+ #
10
+ # @abstract
8
11
  class Base
9
12
  def format(**kwargs)
10
13
  raise NotImplementedError
11
14
  end
12
15
 
16
+ # @return [String] the type of the prompt
13
17
  def prompt_type
14
18
  raise NotImplementedError
15
19
  end
16
20
 
21
+ # @return [Hash] a hash representation of the prompt
17
22
  def to_h
18
23
  raise NotImplementedError
19
24
  end
@@ -1,6 +1,51 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Few Shot Prompt Templates
5
+ #
6
+ # Create a prompt with a few shot examples:
7
+ #
8
+ # prompt = Langchain::Prompt::FewShotPromptTemplate.new(
9
+ # prefix: "Write antonyms for the following words.",
10
+ # suffix: "Input: <code>{adjective}</code>\nOutput:",
11
+ # example_prompt: Langchain::Prompt::PromptTemplate.new(
12
+ # input_variables: ["input", "output"],
13
+ # template: "Input: {input}\nOutput: {output}"
14
+ # ),
15
+ # examples: [
16
+ # { "input": "happy", "output": "sad" },
17
+ # { "input": "tall", "output": "short" }
18
+ # ],
19
+ # input_variables: ["adjective"]
20
+ # )
21
+ #
22
+ # prompt.format(adjective: "good")
23
+ #
24
+ # # Write antonyms for the following words.
25
+ # #
26
+ # # Input: happy
27
+ # # Output: sad
28
+ # #
29
+ # # Input: tall
30
+ # # Output: short
31
+ # #
32
+ # # Input: good
33
+ # # Output:
34
+ #
35
+ # Save prompt template to JSON file:
36
+ #
37
+ # prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
38
+ #
39
+ # Loading a new prompt template using a JSON file:
40
+ #
41
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
42
+ # prompt.prefix # "Write antonyms for the following words."
43
+ #
44
+ # Loading a new prompt template using a YAML file:
45
+ #
46
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
47
+ # prompt.input_variables #=> ["adjective", "content"]
48
+ #
4
49
  class FewShotPromptTemplate < Base
5
50
  attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator
6
51
 
@@ -1,6 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Prompt Templates
5
+ #
6
+ # Create a prompt with one input variable:
7
+ #
8
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
9
+ # prompt.format(adjective: "funny") # "Tell me a funny joke."
10
+ #
11
+ # Create a prompt with multiple input variables:
12
+ #
13
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
14
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
15
+ #
16
+ # Creating a PromptTemplate using just a prompt and no input_variables:
17
+ #
18
+ # prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
19
+ # prompt.input_variables # ["adjective", "content"]
20
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
21
+ #
22
+ # Save prompt template to JSON file:
23
+ #
24
+ # prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
25
+ #
26
+ # Loading a new prompt template using a JSON file:
27
+ #
28
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
29
+ # prompt.input_variables # ["adjective", "content"]
30
+ #
31
+ # Loading a new prompt template using a YAML file:
32
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
33
+ # prompt.input_variables #=> ["adjective", "content"]
34
+ #
4
35
  class PromptTemplate < Base
5
36
  attr_reader :template, :input_variables, :validate_template
6
37
 
@@ -1,16 +1,53 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Tool
4
+ # = Tools
5
+ #
6
+ # Tools are used by Agents to perform specific tasks. Basically anything is possible with enough code!
7
+ #
8
+ # == Available Tools
9
+ #
10
+ # - {Langchain::Tool::Calculator}: Calculate the result of a math expression
11
+ # - {Langchain::Tool::RubyCodeInterpretor}: Runs ruby code
12
+ # - {Langchain::Tool::Search}: search on Google (via SerpAPI)
13
+ # - {Langchain::Tool::Wikipedia}: search on Wikipedia
14
+ #
15
+ # == Usage
16
+ #
17
+ # 1. Pick the tools you'd like to pass to an Agent and install the gems listed under **Gem Requirements**
18
+ #
19
+ # # To use all 3 tools:
20
+ # gem install eqn
21
+ # gem install google_search_results
22
+ # gem install wikipedia-client
23
+ #
24
+ # 2. Set the environment variables listed under **ENV Requirements**
25
+ #
26
+ # export SERPAPI_API_KEY=paste-your-serpapi-api-key-here
27
+ #
28
+ # 3. Pass the tools when Agent is instantiated.
29
+ #
30
+ # agent = Langchain::Agent::ChainOfThoughtAgent.new(
31
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
32
+ # llm_api_key: ENV["OPENAI_API_KEY"],
33
+ # tools: ["search", "calculator", "wikipedia"]
34
+ # )
35
+ #
36
+ # 4. Confirm that the Agent is using the Tools you passed in:
37
+ #
38
+ # agent.tools
39
+ # # => ["search", "calculator", "wikipedia"]
40
+ #
41
+ # == Adding Tools
42
+ #
43
+ # 1. Create a new file in lib/langchain/tool/your_tool_name.rb
44
+ # 2. Create a class in the file that inherits from {Langchain::Tool::Base}
45
+ # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
46
+ # 4. Implement `execute(input:)` method in your tool class
47
+ # 5. Add your tool to the {file:README.md}
4
48
  class Base
5
49
  include Langchain::DependencyHelper
6
50
 
7
- # How to add additional Tools?
8
- # 1. Create a new file in lib/tool/your_tool_name.rb
9
- # 2. Create a class in the file that inherits from Langchain::Tool::Base
10
- # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
11
- # 4. Implement `execute(input:)` method in your tool class
12
- # 5. Add your tool to the README.md
13
-
14
51
  #
15
52
  # Returns the NAME constant of the tool
16
53
  #
@@ -20,6 +57,15 @@ module Langchain::Tool
20
57
  self.class.const_get(:NAME)
21
58
  end
22
59
 
60
+ #
61
+ # Returns the DESCRIPTION constant of the tool
62
+ #
63
+ # @return [String] tool description
64
+ #
65
+ def tool_description
66
+ self.class.const_get(:DESCRIPTION)
67
+ end
68
+
23
69
  #
24
70
  # Sets the DESCRIPTION constant of the tool
25
71
  #
@@ -44,7 +90,7 @@ module Langchain::Tool
44
90
  #
45
91
  # @param input [String] input to the tool
46
92
  # @return [String] answer
47
- #
93
+ # @raise NotImplementedError when not implemented
48
94
  def execute(input:)
49
95
  raise NotImplementedError, "Your tool must implement the `#execute(input:)` method that returns a string"
50
96
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Utils
5
+ module TokenLength
6
+ #
7
+ # This class is meant to validate the length of the text passed in to Google Palm's API.
8
+ # It is used to validate the token length before the API call is made
9
+ #
10
+ class GooglePalmValidator
11
+ TOKEN_LIMITS = {
12
+ # Source:
13
+ # This data can be pulled when `list_models()` method is called: https://github.com/andreibondarev/google_palm_api#usage
14
+
15
+ # chat-bison-001 is the only model that currently supports countMessageTokens functions
16
+ "chat-bison-001" => {
17
+ "input_token_limit" => 4000, # 4096 is the limit but the countMessageTokens does not return anything higher than 4000
18
+ "output_token_limit" => 1024
19
+ }
20
+ # "text-bison-001" => {
21
+ # "input_token_limit" => 8196,
22
+ # "output_token_limit" => 1024
23
+ # },
24
+ # "embedding-gecko-001" => {
25
+ # "input_token_limit" => 1024
26
+ # }
27
+ }.freeze
28
+
29
+ #
30
+ # Validate the context length of the text
31
+ #
32
+ # @param content [String | Array<String>] The text or array of texts to validate
33
+ # @param model_name [String] The model name to validate against
34
+ # @return [Integer] Whether the text is valid or not
35
+ # @raise [TokenLimitExceeded] If the text is too long
36
+ #
37
+ def self.validate_max_tokens!(google_palm_llm, content, model_name)
38
+ text_token_length = if content.is_a?(Array)
39
+ content.sum { |item| token_length(google_palm_llm, item.to_json, model_name) }
40
+ else
41
+ token_length(google_palm_llm, content, model_name)
42
+ end
43
+
44
+ leftover_tokens = TOKEN_LIMITS.dig(model_name, "input_token_limit") - text_token_length
45
+
46
+ # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
47
+ if leftover_tokens <= 0
48
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS.dig(model_name, "input_token_limit")} tokens, but the given text is #{text_token_length} tokens long."
49
+ end
50
+
51
+ leftover_tokens
52
+ end
53
+
54
+ #
55
+ # Calculate token length for a given text and model name
56
+ #
57
+ # @param llm [Langchain::LLM:GooglePalm] The Langchain::LLM:GooglePalm instance
58
+ # @param text [String] The text to calculate the token length for
59
+ # @param model_name [String] The model name to validate against
60
+ # @return [Integer] The token length of the text
61
+ #
62
+ def self.token_length(llm, text, model_name = "chat-bison-001")
63
+ response = llm.client.count_message_tokens(model: model_name, prompt: text)
64
+ response.dig("tokenCount")
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "tiktoken_ruby"
4
+
5
+ module Langchain
6
+ module Utils
7
+ module TokenLength
8
+ #
9
+ # This class is meant to validate the length of the text passed in to OpenAI's API.
10
+ # It is used to validate the token length before the API call is made
11
+ #
12
+ class OpenAIValidator
13
+ TOKEN_LIMITS = {
14
+ # Source:
15
+ # https://platform.openai.com/docs/api-reference/embeddings
16
+ # https://platform.openai.com/docs/models/gpt-4
17
+ "text-embedding-ada-002" => 8191,
18
+ "gpt-3.5-turbo" => 4096,
19
+ "gpt-3.5-turbo-0301" => 4096,
20
+ "text-davinci-003" => 4097,
21
+ "text-davinci-002" => 4097,
22
+ "code-davinci-002" => 8001,
23
+ "gpt-4" => 8192,
24
+ "gpt-4-0314" => 8192,
25
+ "gpt-4-32k" => 32768,
26
+ "gpt-4-32k-0314" => 32768,
27
+ "text-curie-001" => 2049,
28
+ "text-babbage-001" => 2049,
29
+ "text-ada-001" => 2049,
30
+ "davinci" => 2049,
31
+ "curie" => 2049,
32
+ "babbage" => 2049,
33
+ "ada" => 2049
34
+ }.freeze
35
+
36
+ #
37
+ # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
38
+ #
39
+ # @param content [String | Array<String>] The text or array of texts to validate
40
+ # @param model_name [String] The model name to validate against
41
+ # @return [Integer] Whether the text is valid or not
42
+ # @raise [TokenLimitExceeded] If the text is too long
43
+ #
44
+ def self.validate_max_tokens!(content, model_name)
45
+ text_token_length = if content.is_a?(Array)
46
+ content.sum { |item| token_length(item.to_json, model_name) }
47
+ else
48
+ token_length(content, model_name)
49
+ end
50
+
51
+ max_tokens = TOKEN_LIMITS[model_name] - text_token_length
52
+
53
+ # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
54
+ if max_tokens <= 0
55
+ raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
56
+ end
57
+
58
+ max_tokens
59
+ end
60
+
61
+ #
62
+ # Calculate token length for a given text and model name
63
+ #
64
+ # @param text [String] The text to calculate the token length for
65
+ # @param model_name [String] The model name to validate against
66
+ # @return [Integer] The token length of the text
67
+ #
68
+ def self.token_length(text, model_name)
69
+ encoder = Tiktoken.encoding_for_model(model_name)
70
+ encoder.encode(text).length
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
@@ -3,6 +3,88 @@
3
3
  require "forwardable"
4
4
 
5
5
  module Langchain::Vectorsearch
6
+ # = Vector Databases
7
+ # A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
8
+ #
9
+ # == Available vector databases
10
+ #
11
+ # - {Langchain::Vectorsearch::Chroma}
12
+ # - {Langchain::Vectorsearch::Milvus}
13
+ # - {Langchain::Vectorsearch::Pinecone}
14
+ # - {Langchain::Vectorsearch::Qdrant}
15
+ # - {Langchain::Vectorsearch::Weaviate}
16
+ # - {Langchain::Vectorsearch::Pgvector}
17
+ #
18
+ # == Usage
19
+ #
20
+ # 1. Pick a vector database from list.
21
+ # 2. Review its documentation to install the required gems, and create an account, get an API key, etc
22
+ # 3. Instantiate the vector database class:
23
+ #
24
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(
25
+ # url: ENV["WEAVIATE_URL"],
26
+ # api_key: ENV["WEAVIATE_API_KEY"],
27
+ # index_name: "Documents",
28
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm, or :replicate
29
+ # llm_api_key: ENV["OPENAI_API_KEY"] # API key for the selected LLM
30
+ # )
31
+ #
32
+ # # You can instantiate other supported vector databases the same way:
33
+ # milvus = Langchain::Vectorsearch::Milvus.new(...)
34
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(...)
35
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(...)
36
+ # chrome = Langchain::Vectorsearch::Chroma.new(...)
37
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(...)
38
+ #
39
+ # == Schema Creation
40
+ #
41
+ # `create_default_schema()` creates default schema in your vector database.
42
+ #
43
+ # search.create_default_schema
44
+ #
45
+ # (We plan on offering customizable schema creation shortly)
46
+ #
47
+ # == Adding Data
48
+ #
49
+ # You can add data with:
50
+ # 1. `add_data(path:, paths:)` to add any kind of data type
51
+ #
52
+ # my_pdf = Langchain.root.join("path/to/my.pdf")
53
+ # my_text = Langchain.root.join("path/to/my.txt")
54
+ # my_docx = Langchain.root.join("path/to/my.docx")
55
+ # my_csv = Langchain.root.join("path/to/my.csv")
56
+ #
57
+ # search.add_data(paths: [my_pdf, my_text, my_docx, my_csv])
58
+ #
59
+ # 2. `add_texts(texts:)` to only add textual data
60
+ #
61
+ # search.add_texts(
62
+ # texts: [
63
+ # "Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
64
+ # "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s"
65
+ # ]
66
+ # )
67
+ #
68
+ # == Retrieving Data
69
+ #
70
+ # `similarity_search_by_vector(embedding:, k:)` searches the vector database for the closest `k` number of embeddings.
71
+ #
72
+ # search.similarity_search_by_vector(
73
+ # embedding: ...,
74
+ # k: # number of results to be retrieved
75
+ # )
76
+ #
77
+ # `vector_store.similarity_search(query:, k:)` generates an embedding for the query and searches the vector database for the closest `k` number of embeddings.
78
+ #
79
+ # search.similarity_search_by_vector(
80
+ # embedding: ...,
81
+ # k: # number of results to be retrieved
82
+ # )
83
+ #
84
+ # `ask(question:)` generates an embedding for the passed-in question, searches the vector database for closest embeddings and then passes these as context to the LLM to generate an answer to the question.
85
+ #
86
+ # search.ask(question: "What is lorem ipsum?")
87
+ #
6
88
  class Base
7
89
  include Langchain::DependencyHelper
8
90
  extend Forwardable
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::Vectorsearch
4
+ class Hnswlib < Base
5
+ #
6
+ # Wrapper around HNSW (Hierarchical Navigable Small World) library.
7
+ # HNSWLib is an in-memory vectorstore that can be saved to a file on disk.
8
+ #
9
+ # Gem requirements:
10
+ # gem "hnswlib", "~> 0.8.1"
11
+ #
12
+ # Usage:
13
+ # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, url:, index_name:)
14
+ #
15
+
16
+ attr_reader :client, :path_to_index
17
+
18
+ #
19
+ # Initialize the HNSW vector search
20
+ #
21
+ # @param llm [Object] The LLM client to use
22
+ # @param path_to_index [String] The local path to the index file, e.g.: "/storage/index.ann"
23
+ # @return [Langchain::Vectorsearch::Hnswlib] Class instance
24
+ #
25
+ def initialize(llm:, path_to_index:)
26
+ depends_on "hnswlib"
27
+ require "hnswlib"
28
+
29
+ super(llm: llm)
30
+
31
+ @client = ::Hnswlib::HierarchicalNSW.new(space: DEFAULT_METRIC, dim: llm.default_dimension)
32
+ @path_to_index = path_to_index
33
+
34
+ initialize_index
35
+ end
36
+
37
+ #
38
+ # Add a list of texts and corresponding IDs to the index
39
+ #
40
+ # @param texts [Array] The list of texts to add
41
+ # @param ids [Array] The list of corresponding IDs (integers) to the texts
42
+ # @return [Boolean] The response from the HNSW library
43
+ #
44
+ def add_texts(texts:, ids:)
45
+ resize_index(texts.size)
46
+
47
+ Array(texts).each_with_index do |text, i|
48
+ embedding = llm.embed(text: text)
49
+
50
+ client.add_point(embedding, ids[i])
51
+ end
52
+
53
+ client.save_index(path_to_index)
54
+ end
55
+
56
+ #
57
+ # Search for similar texts
58
+ #
59
+ # @param query [String] The text to search for
60
+ # @param k [Integer] The number of results to return
61
+ # @return [Array] Results in the format `[[id1, distance3], [id2, distance2]]`
62
+ #
63
+ def similarity_search(
64
+ query:,
65
+ k: 4
66
+ )
67
+ embedding = llm.embed(text: query)
68
+
69
+ similarity_search_by_vector(
70
+ embedding: embedding,
71
+ k: k
72
+ )
73
+ end
74
+
75
+ #
76
+ # Search for the K nearest neighbors of a given vector
77
+ #
78
+ # @param embedding [Array] The embedding to search for
79
+ # @param k [Integer] The number of results to return
80
+ # @return [Array] Results in the format `[[id1, distance3], [id2, distance2]]`
81
+ #
82
+ def similarity_search_by_vector(
83
+ embedding:,
84
+ k: 4
85
+ )
86
+ client.search_knn(embedding, k)
87
+ end
88
+
89
+ private
90
+
91
+ #
92
+ # Optionally resizes the index if there's no space for new data
93
+ #
94
+ # @param num_of_elements_to_add [Integer] The number of elements to add to the index
95
+ #
96
+ def resize_index(num_of_elements_to_add)
97
+ current_count = client.current_count
98
+
99
+ if (current_count + num_of_elements_to_add) > client.max_elements
100
+ new_size = current_count + num_of_elements_to_add
101
+
102
+ client.resize_index(new_size)
103
+ end
104
+ end
105
+
106
+ #
107
+ # Loads or initializes the new index
108
+ #
109
+ def initialize_index
110
+ if File.exist?(path_to_index)
111
+ client.load_index(path_to_index)
112
+
113
+ Langchain.logger.info("[#{self.class.name}]".blue + ": Successfully loaded the index at \"#{path_to_index}\"")
114
+ else
115
+ # Default max_elements: 100, but we constantly resize the index as new data is written to it
116
+ client.init_index(max_elements: 100)
117
+
118
+ Langchain.logger.info("[#{self.class.name}]".blue + ": Creating a new index at \"#{path_to_index}\"")
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.4"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -6,10 +6,53 @@ require "colorize"
6
6
 
7
7
  require_relative "./langchain/version"
8
8
 
9
+ # Langchain.rb a is library for building LLM-backed Ruby applications. It is an abstraction layer that sits on top of the emerging AI-related tools that makes it easy for developers to consume and string those services together.
10
+ #
11
+ # = Installation
12
+ # Install the gem and add to the application's Gemfile by executing:
13
+ #
14
+ # $ bundle add langchainrb
15
+ #
16
+ # If bundler is not being used to manage dependencies, install the gem by executing:
17
+ #
18
+ # $ gem install langchainrb
19
+ #
20
+ # Require the gem to start using it:
21
+ #
22
+ # require "langchain"
23
+ #
24
+ # = Concepts
25
+ #
26
+ # == Processors
27
+ # Processors load and parse/process various data types such as CSVs, PDFs, Word documents, HTML pages, and others.
28
+ #
29
+ # == Chunkers
30
+ # Chunkers split data based on various available options such as delimeters, chunk sizes or custom-defined functions. Chunkers are used when data needs to be split up before being imported in vector databases.
31
+ #
32
+ # == Prompts
33
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
34
+ #
35
+ # == Large Language Models (LLMs)
36
+ # LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
37
+ #
38
+ # == Vectorsearch Databases
39
+ # Vector database is a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
40
+ #
41
+ # == Embedding
42
+ # Word embedding or word vector is an approach with which we represent documents and words. It is defined as a numeric vector input that allows words with similar meanings to have the same representation. It can approximate meaning and represent a word in a lower dimensional space.
43
+ #
44
+ #
45
+ # = Logging
46
+ #
47
+ # LangChain.rb uses standard logging mechanisms and defaults to :debug level. Most messages are at info level, but we will add debug or warn statements as needed. To show all log messages:
48
+ #
49
+ # Langchain.logger.level = :info
9
50
  module Langchain
10
51
  class << self
52
+ # @return [Logger]
11
53
  attr_accessor :logger
12
54
 
55
+ # @return [Pathname]
13
56
  attr_reader :root
14
57
  end
15
58
 
@@ -19,6 +62,7 @@ module Langchain
19
62
 
20
63
  autoload :Loader, "langchain/loader"
21
64
  autoload :Data, "langchain/data"
65
+ autoload :Chat, "langchain/chat"
22
66
  autoload :DependencyHelper, "langchain/dependency_helper"
23
67
 
24
68
  module Agent
@@ -49,12 +93,18 @@ module Langchain
49
93
  end
50
94
 
51
95
  module Utils
52
- autoload :TokenLengthValidator, "langchain/utils/token_length_validator"
96
+ module TokenLength
97
+ class TokenLimitExceeded < StandardError; end
98
+
99
+ autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
100
+ autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
101
+ end
53
102
  end
54
103
 
55
104
  module Vectorsearch
56
105
  autoload :Base, "langchain/vectorsearch/base"
57
106
  autoload :Chroma, "langchain/vectorsearch/chroma"
107
+ autoload :Hnswlib, "langchain/vectorsearch/hnswlib"
58
108
  autoload :Milvus, "langchain/vectorsearch/milvus"
59
109
  autoload :Pinecone, "langchain/vectorsearch/pinecone"
60
110
  autoload :Pgvector, "langchain/vectorsearch/pgvector"