langchainrb 0.5.2 → 0.5.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d36de4206b792714ba9b6773c03272e9638b14caf7140e0bc00c3e767aa5fdef
4
- data.tar.gz: 819fab9de55a34e4e6dc865febc19bb9979df55fa8fc6a753774cf1961c40103
3
+ metadata.gz: 9ff52d2013c31933c6c12e3b824db979122decf0f7a4be2a99ec22da22ecb4f3
4
+ data.tar.gz: f632e80bd634e1ff899216498a13e8dc24073d435754077612f8753f51c9fbcf
5
5
  SHA512:
6
- metadata.gz: 6e180b41bbca96bd5523c276923f223bbebe470314086c6a909df440890793bcc70dbd66ecf59bf5d0fd52426650cc5d2684c56cc8fc643209cc1679527cbef4
7
- data.tar.gz: af5db76c2b22b5c7bdc1170de437921e8464a16566f46a5cad465d69e6da47c97a82f7331a5ea5747840e58acc71463aa8456b03e9bc8851efda7b734e5d23cc
6
+ metadata.gz: e79f3c438f18b90d6f926088a613dfafdf7009e3c9d768adcb4ba097f688731073af1a983ddbeb754e479d82ca34492c085a05d9bb6d2ddcd4996a5be418d03c
7
+ data.tar.gz: 32682afc3ce3d3ce63d351ff10695d97f46f19b725f975ed6901b4970f6254a95c50fc3575bfc3a3003c95c0cd5a955e1ccdbb17e062cbcf65c1fa9f541a286a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.3] - 2023-06-09
4
+ - 🗣️ LLMs
5
+ - Chat message history support for Langchain::LLM::GooglePalm and Langchain::LLM::OpenAI
6
+
3
7
  ## [0.5.2] - 2023-06-07
4
8
  - 🗣️ LLMs
5
9
  - Auto-calculate the max_tokens: setting to be passed on to OpenAI
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.5.2)
4
+ langchainrb (0.5.3)
5
5
  colorize (~> 0.8.1)
6
6
  tiktoken_ruby (~> 0.0.5)
7
7
 
@@ -122,7 +122,7 @@ GEM
122
122
  faraday-retry (1.0.3)
123
123
  faraday_middleware (1.2.0)
124
124
  faraday (~> 1.0)
125
- google_palm_api (0.1.0)
125
+ google_palm_api (0.1.1)
126
126
  faraday (>= 1.0.0)
127
127
  faraday_middleware (>= 1.0.0)
128
128
  google_search_results (2.0.1)
@@ -310,7 +310,7 @@ DEPENDENCIES
310
310
  docx (~> 0.8.0)
311
311
  dotenv-rails (~> 2.7.6)
312
312
  eqn (~> 1.6.5)
313
- google_palm_api (~> 0.1.0)
313
+ google_palm_api (~> 0.1.1)
314
314
  google_search_results (~> 2.0.0)
315
315
  hugging-face (~> 0.3.4)
316
316
  langchainrb!
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- 🦜️🔗 LangChain.rb
1
+ 💎🔗 LangChain.rb
2
2
  ---
3
3
  ⚡ Building applications with LLMs through composability ⚡
4
4
 
@@ -6,7 +6,10 @@
6
6
 
7
7
  :warning: UNDER ACTIVE AND RAPID DEVELOPMENT (MAY BE BUGGY AND UNTESTED)
8
8
 
9
- ![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg) [![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
9
+ ![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg)
10
+ [![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
11
+ [![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
12
+ [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb/blob/main/LICENSE.txt)
10
13
 
11
14
  Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
12
15
 
@@ -33,6 +36,7 @@ require "langchain"
33
36
  | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
34
37
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
35
38
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
39
+ | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
36
40
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
37
41
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
42
 
@@ -47,6 +51,7 @@ Pick the vector search database you'll be using and instantiate the client:
47
51
  client = Langchain::Vectorsearch::Weaviate.new(
48
52
  url: ENV["WEAVIATE_URL"],
49
53
  api_key: ENV["WEAVIATE_API_KEY"],
54
+ index: "",
50
55
  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
51
56
  )
52
57
 
@@ -55,6 +60,7 @@ client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
55
60
  client = Langchain::Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
56
61
  client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
57
62
  client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
63
+ client = Langchain::Vectorsearch::Pgvector.new(...) # `gem "pgvector", "~> 0.2"`
58
64
  ```
59
65
 
60
66
  ```ruby
@@ -135,17 +141,17 @@ cohere.complete(prompt: "What is the meaning of life?")
135
141
  #### HuggingFace
136
142
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
137
143
  ```ruby
138
- cohere = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
144
+ hugging_face = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
139
145
  ```
140
146
 
141
147
  #### Replicate
142
148
  Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
143
149
  ```ruby
144
- cohere = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
150
+ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
145
151
  ```
146
152
 
147
153
  #### Google PaLM (Pathways Language Model)
148
- Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
154
+ Add `"google_palm_api", "~> 0.1.1"` to your Gemfile.
149
155
  ```ruby
150
156
  google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
151
157
  ```
data/Rakefile CHANGED
@@ -14,5 +14,4 @@ Rake::Task["spec"].enhance do
14
14
  end
15
15
 
16
16
  YARD::Rake::YardocTask.new do |t|
17
- t.options = ["--fail-on-warning"]
18
17
  end
@@ -1,6 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Agent
4
+ # = Agents
5
+ #
6
+ # Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
7
+ #
8
+ # Available:
9
+ # - {Langchain::Agent::ChainOfThoughtAgent}
10
+ #
11
+ # @abstract
4
12
  class Base
5
13
  end
6
14
  end
@@ -1,6 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Agent
4
+ # = Chain of Thought Agent
5
+ #
6
+ # llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
7
+ #
8
+ # agent = Langchain::Agent::ChainOfThoughtAgent.new(
9
+ # llm: llm,
10
+ # tools: ["search", "calculator", "wikipedia"]
11
+ # )
12
+ #
13
+ # agent.tools
14
+ # # => ["search", "calculator", "wikipedia"]
15
+ #
16
+ # agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
17
+ # #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
4
18
  class ChainOfThoughtAgent < Base
5
19
  attr_reader :llm, :tools
6
20
 
@@ -1,16 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ #
5
+ # Wrapper around AI21 Studio APIs.
6
+ #
7
+ # Gem requirements:
8
+ # gem "ai21", "~> 0.2.0"
9
+ #
10
+ # Usage:
11
+ # ai21 = Langchain::LLM::AI21.new(api_key:)
12
+ #
4
13
  class AI21 < Base
5
- #
6
- # Wrapper around AI21 Studio APIs.
7
- #
8
- # Gem requirements: gem "ai21", "~> 0.2.0"
9
- #
10
- # Usage:
11
- # ai21 = Langchain::LLM::AI21.new(api_key:)
12
- #
13
-
14
14
  def initialize(api_key:)
15
15
  depends_on "ai21"
16
16
  require "ai21"
@@ -1,31 +1,58 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
5
+ #
6
+ # Langchain.rb provides a common interface to interact with all supported LLMs:
7
+ #
8
+ # - {Langchain::LLM::AI21}
9
+ # - {Langchain::LLM::Cohere}
10
+ # - {Langchain::LLM::GooglePalm}
11
+ # - {Langchain::LLM::HuggingFace}
12
+ # - {Langchain::LLM::OpenAI}
13
+ # - {Langchain::LLM::Replicate}
14
+ #
15
+ # @abstract
4
16
  class Base
5
17
  include Langchain::DependencyHelper
6
18
 
19
+ # A client for communicating with the LLM
7
20
  attr_reader :client
8
21
 
9
22
  def default_dimension
10
23
  self.class.const_get(:DEFAULTS).dig(:dimension)
11
24
  end
12
25
 
13
- # Method supported by an LLM that generates a response for a given chat-style prompt
26
+ #
27
+ # Generate a chat completion for a given prompt. Parameters will depend on the LLM
28
+ #
29
+ # @raise NotImplementedError if not supported by the LLM
14
30
  def chat(...)
15
31
  raise NotImplementedError, "#{self.class.name} does not support chat"
16
32
  end
17
33
 
18
- # Method supported by an LLM that completes a given prompt
34
+ #
35
+ # Generate a completion for a given prompt. Parameters will depend on the LLM.
36
+ #
37
+ # @raise NotImplementedError if not supported by the LLM
19
38
  def complete(...)
20
39
  raise NotImplementedError, "#{self.class.name} does not support completion"
21
40
  end
22
41
 
23
- # Method supported by an LLM that generates an embedding for a given text or array of texts
42
+ #
43
+ # Generate an embedding for a given text. Parameters depends on the LLM.
44
+ #
45
+ # @raise NotImplementedError if not supported by the LLM
46
+ #
24
47
  def embed(...)
25
48
  raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
26
49
  end
27
50
 
28
- # Method supported by an LLM that summarizes a given text
51
+ #
52
+ # Generate a summary for a given text. Parameters depends on the LLM.
53
+ #
54
+ # @raise NotImplementedError if not supported by the LLM
55
+ #
29
56
  def summarize(...)
30
57
  raise NotImplementedError, "#{self.class.name} does not support summarization"
31
58
  end
@@ -1,16 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ #
5
+ # Wrapper around the Cohere API.
6
+ #
7
+ # Gem requirements:
8
+ # gem "cohere-ruby", "~> 0.9.4"
9
+ #
10
+ # Usage:
11
+ # cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
12
+ #
4
13
  class Cohere < Base
5
- #
6
- # Wrapper around the Cohere API.
7
- #
8
- # Gem requirements: gem "cohere-ruby", "~> 0.9.4"
9
- #
10
- # Usage:
11
- # cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
12
- #
13
-
14
14
  DEFAULTS = {
15
15
  temperature: 0.0,
16
16
  completion_model_name: "base",
@@ -43,6 +43,7 @@ module Langchain::LLM
43
43
  # Generate a completion for a given prompt
44
44
  #
45
45
  # @param prompt [String] The prompt to generate a completion for
46
+ # @param params[:stop_sequences]
46
47
  # @return [Hash] The completion
47
48
  #
48
49
  def complete(prompt:, **params)
@@ -1,11 +1,20 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ #
5
+ # Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
6
+ #
7
+ # Gem requirements:
8
+ # gem "google_palm_api", "~> 0.1.0"
9
+ #
10
+ # Usage:
11
+ # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
12
+ #
4
13
  class GooglePalm < Base
5
14
  #
6
15
  # Wrapper around the Google PaLM (Pathways Language Model) APIs.
7
16
  #
8
- # Gem requirements: gem "google_palm_api", "~> 0.1.0"
17
+ # Gem requirements: gem "google_palm_api", "~> 0.1.1"
9
18
  #
10
19
  # Usage:
11
20
  # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
@@ -40,6 +49,7 @@ module Langchain::LLM
40
49
  # Generate a completion for a given prompt
41
50
  #
42
51
  # @param prompt [String] The prompt to generate a completion for
52
+ # @param params extra parameters passed to GooglePalmAPI::Client#generate_text
43
53
  # @return [String] The completion
44
54
  #
45
55
  def complete(prompt:, **params)
@@ -66,13 +76,19 @@ module Langchain::LLM
66
76
  # Generate a chat completion for a given prompt
67
77
  #
68
78
  # @param prompt [String] The prompt to generate a chat completion for
79
+ # @param messages [Array] The messages that have been sent in the conversation
80
+ # @param params extra parameters passed to GooglePalmAPI::Client#generate_chat_message
69
81
  # @return [String] The chat completion
70
82
  #
71
- def chat(prompt:, **params)
83
+ def chat(prompt: "", messages: [], **params)
84
+ raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
85
+
86
+ messages << {author: "0", content: prompt} if !prompt.empty?
87
+
72
88
  # TODO: Figure out how to introduce persisted conversations
73
89
  default_params = {
74
- prompt: prompt,
75
- temperature: DEFAULTS[:temperature]
90
+ temperature: DEFAULTS[:temperature],
91
+ messages: messages
76
92
  }
77
93
 
78
94
  if params[:stop_sequences]
@@ -1,16 +1,16 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ #
5
+ # Wrapper around the HuggingFace Inference API: https://huggingface.co/inference-api
6
+ #
7
+ # Gem requirements:
8
+ # gem "hugging-face", "~> 0.3.4"
9
+ #
10
+ # Usage:
11
+ # hf = Langchain::LLM::HuggingFace.new(api_key: "YOUR_API_KEY")
12
+ #
4
13
  class HuggingFace < Base
5
- #
6
- # Wrapper around the HuggingFace Inference API.
7
- #
8
- # Gem requirements: gem "hugging-face", "~> 0.3.4"
9
- #
10
- # Usage:
11
- # hf = Langchain::LLM::HuggingFace.new(api_key: "YOUR_API_KEY")
12
- #
13
-
14
14
  # The gem does not currently accept other models:
15
15
  # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
16
16
  DEFAULTS = {
@@ -1,16 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ # LLM interface for OpenAI APIs: https://platform.openai.com/overview
5
+ #
6
+ # Gem requirements:
7
+ # gem "ruby-openai", "~> 4.0.0"
8
+ #
9
+ # Usage:
10
+ # openai = Langchain::LLM::OpenAI.new(api_key:, llm_options: {})
11
+ #
4
12
  class OpenAI < Base
5
- #
6
- # Wrapper around OpenAI APIs.
7
- #
8
- # Gem requirements: gem "ruby-openai", "~> 4.0.0"
9
- #
10
- # Usage:
11
- # openai = Langchain::LLM::OpenAI.new(api_key:, llm_options: {})
12
- #
13
-
14
13
  DEFAULTS = {
15
14
  temperature: 0.0,
16
15
  completion_model_name: "text-davinci-003",
@@ -30,6 +29,7 @@ module Langchain::LLM
30
29
  # Generate an embedding for a given text
31
30
  #
32
31
  # @param text [String] The text to generate an embedding for
32
+ # @param params extra parameters passed to OpenAI::Client#embeddings
33
33
  # @return [Array] The embedding
34
34
  #
35
35
  def embed(text:, **params)
@@ -45,6 +45,7 @@ module Langchain::LLM
45
45
  # Generate a completion for a given prompt
46
46
  #
47
47
  # @param prompt [String] The prompt to generate a completion for
48
+ # @param params extra parameters passed to OpenAI::Client#complete
48
49
  # @return [String] The completion
49
50
  #
50
51
  def complete(prompt:, **params)
@@ -61,13 +62,18 @@ module Langchain::LLM
61
62
  # Generate a chat completion for a given prompt
62
63
  #
63
64
  # @param prompt [String] The prompt to generate a chat completion for
65
+ # @param messages [Array] The messages that have been sent in the conversation
66
+ # @param params extra parameters passed to OpenAI::Client#chat
64
67
  # @return [String] The chat completion
65
68
  #
66
- def chat(prompt:, **params)
67
- parameters = compose_parameters DEFAULTS[:chat_completion_model_name], params
69
+ def chat(prompt: "", messages: [], **params)
70
+ raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
68
71
 
69
- parameters[:messages] = [{role: "user", content: prompt}]
70
- parameters[:max_tokens] = Langchain::Utils::TokenLengthValidator.validate_max_tokens!(prompt, parameters[:model])
72
+ messages << {role: "user", content: prompt} if !prompt.empty?
73
+
74
+ parameters = compose_parameters DEFAULTS[:chat_completion_model_name], params
75
+ parameters[:messages] = messages
76
+ parameters[:max_tokens] = validate_max_tokens(messages, parameters[:model])
71
77
 
72
78
  response = client.chat(parameters: parameters)
73
79
  response.dig("choices", 0, "message", "content")
@@ -97,5 +103,9 @@ module Langchain::LLM
97
103
 
98
104
  default_params.merge(params)
99
105
  end
106
+
107
+ def validate_max_tokens(messages, model)
108
+ Langchain::Utils::TokenLengthValidator.validate_max_tokens!(messages, model)
109
+ end
100
110
  end
101
111
  end
@@ -1,22 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ #
5
+ # Wrapper around Replicate.com LLM provider
6
+ #
7
+ # Gem requirements:
8
+ # gem "replicate-ruby", "~> 0.2.2"
9
+ #
10
+ # Use it directly:
11
+ # replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
12
+ #
13
+ # Or pass it to be used by a vector search DB:
14
+ # chroma = Langchain::Vectorsearch::Chroma.new(
15
+ # url: ENV["CHROMA_URL"],
16
+ # index_name: "...",
17
+ # llm: replicate
18
+ # )
19
+ #
4
20
  class Replicate < Base
5
- #
6
- # Wrapper around Replicate.com LLM provider
7
- #
8
- # Gem requirements: gem "replicate-ruby", "~> 0.2.2"
9
- #
10
- # Use it directly:
11
- # replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
12
- #
13
- # Or pass it to be instantiated by a vector search DB:
14
- # chroma = Vectorsearch::Chroma.new(
15
- # url: ENV["CHROMA_URL"],
16
- # index_name: "...",
17
- # llm: Langchain::LLM::Replicate(api_key: ENV["REPLICATE_API_KEY"])
18
- # )
19
-
20
21
  DEFAULTS = {
21
22
  # TODO: Figure out how to send the temperature to the API
22
23
  temperature: 0.01, # Minimum accepted value
@@ -2,6 +2,7 @@
2
2
 
3
3
  module Langchain
4
4
  module Processors
5
+ # Processors load and parse/process various data types such as CSVs, PDFs, Word documents, HTML pages, and others.
5
6
  class Base
6
7
  include Langchain::DependencyHelper
7
8
 
@@ -5,15 +5,20 @@ require "json"
5
5
  require "yaml"
6
6
 
7
7
  module Langchain::Prompt
8
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
9
+ #
10
+ # @abstract
8
11
  class Base
9
12
  def format(**kwargs)
10
13
  raise NotImplementedError
11
14
  end
12
15
 
16
+ # @return [String] the type of the prompt
13
17
  def prompt_type
14
18
  raise NotImplementedError
15
19
  end
16
20
 
21
+ # @return [Hash] a hash representation of the prompt
17
22
  def to_h
18
23
  raise NotImplementedError
19
24
  end
@@ -1,6 +1,51 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Few Shot Prompt Templates
5
+ #
6
+ # Create a prompt with a few shot examples:
7
+ #
8
+ # prompt = Langchain::Prompt::FewShotPromptTemplate.new(
9
+ # prefix: "Write antonyms for the following words.",
10
+ # suffix: "Input: <code>{adjective}</code>\nOutput:",
11
+ # example_prompt: Langchain::Prompt::PromptTemplate.new(
12
+ # input_variables: ["input", "output"],
13
+ # template: "Input: {input}\nOutput: {output}"
14
+ # ),
15
+ # examples: [
16
+ # { "input": "happy", "output": "sad" },
17
+ # { "input": "tall", "output": "short" }
18
+ # ],
19
+ # input_variables: ["adjective"]
20
+ # )
21
+ #
22
+ # prompt.format(adjective: "good")
23
+ #
24
+ # # Write antonyms for the following words.
25
+ # #
26
+ # # Input: happy
27
+ # # Output: sad
28
+ # #
29
+ # # Input: tall
30
+ # # Output: short
31
+ # #
32
+ # # Input: good
33
+ # # Output:
34
+ #
35
+ # Save prompt template to JSON file:
36
+ #
37
+ # prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
38
+ #
39
+ # Loading a new prompt template using a JSON file:
40
+ #
41
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
42
+ # prompt.prefix # "Write antonyms for the following words."
43
+ #
44
+ # Loading a new prompt template using a YAML file:
45
+ #
46
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
47
+ # prompt.input_variables #=> ["adjective", "content"]
48
+ #
4
49
  class FewShotPromptTemplate < Base
5
50
  attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator
6
51
 
@@ -1,6 +1,37 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Prompt
4
+ # = Prompt Templates
5
+ #
6
+ # Create a prompt with one input variable:
7
+ #
8
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
9
+ # prompt.format(adjective: "funny") # "Tell me a funny joke."
10
+ #
11
+ # Create a prompt with multiple input variables:
12
+ #
13
+ # prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
14
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
15
+ #
16
+ # Creating a PromptTemplate using just a prompt and no input_variables:
17
+ #
18
+ # prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
19
+ # prompt.input_variables # ["adjective", "content"]
20
+ # prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
21
+ #
22
+ # Save prompt template to JSON file:
23
+ #
24
+ # prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
25
+ #
26
+ # Loading a new prompt template using a JSON file:
27
+ #
28
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
29
+ # prompt.input_variables # ["adjective", "content"]
30
+ #
31
+ # Loading a new prompt template using a YAML file:
32
+ # prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
33
+ # prompt.input_variables #=> ["adjective", "content"]
34
+ #
4
35
  class PromptTemplate < Base
5
36
  attr_reader :template, :input_variables, :validate_template
6
37
 
@@ -1,16 +1,53 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Tool
4
+ # = Tools
5
+ #
6
+ # Tools are used by Agents to perform specific tasks. Basically anything is possible with enough code!
7
+ #
8
+ # == Available Tools
9
+ #
10
+ # - {Langchain::Tool::Calculator}: Calculate the result of a math expression
11
+ # - {Langchain::Tool::RubyCodeInterpretor}: Runs ruby code
12
+ # - {Langchain::Tool::Search}: search on Google (via SerpAPI)
13
+ # - {Langchain::Tool::Wikipedia}: search on Wikipedia
14
+ #
15
+ # == Usage
16
+ #
17
+ # 1. Pick the tools you'd like to pass to an Agent and install the gems listed under **Gem Requirements**
18
+ #
19
+ # # To use all 3 tools:
20
+ # gem install eqn
21
+ # gem install google_search_results
22
+ # gem install wikipedia-client
23
+ #
24
+ # 2. Set the environment variables listed under **ENV Requirements**
25
+ #
26
+ # export SERPAPI_API_KEY=paste-your-serpapi-api-key-here
27
+ #
28
+ # 3. Pass the tools when Agent is instantiated.
29
+ #
30
+ # agent = Langchain::Agent::ChainOfThoughtAgent.new(
31
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
32
+ # llm_api_key: ENV["OPENAI_API_KEY"],
33
+ # tools: ["search", "calculator", "wikipedia"]
34
+ # )
35
+ #
36
+ # 4. Confirm that the Agent is using the Tools you passed in:
37
+ #
38
+ # agent.tools
39
+ # # => ["search", "calculator", "wikipedia"]
40
+ #
41
+ # == Adding Tools
42
+ #
43
+ # 1. Create a new file in lib/langchain/tool/your_tool_name.rb
44
+ # 2. Create a class in the file that inherits from {Langchain::Tool::Base}
45
+ # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
46
+ # 4. Implement `execute(input:)` method in your tool class
47
+ # 5. Add your tool to the {file:README.md}
4
48
  class Base
5
49
  include Langchain::DependencyHelper
6
50
 
7
- # How to add additional Tools?
8
- # 1. Create a new file in lib/tool/your_tool_name.rb
9
- # 2. Create a class in the file that inherits from Langchain::Tool::Base
10
- # 3. Add `NAME=` and `DESCRIPTION=` constants in your Tool class
11
- # 4. Implement `execute(input:)` method in your tool class
12
- # 5. Add your tool to the README.md
13
-
14
51
  #
15
52
  # Returns the NAME constant of the tool
16
53
  #
@@ -44,7 +81,7 @@ module Langchain::Tool
44
81
  #
45
82
  # @param input [String] input to the tool
46
83
  # @return [String] answer
47
- #
84
+ # @raise NotImplementedError when not implemented
48
85
  def execute(input:)
49
86
  raise NotImplementedError, "Your tool must implement the `#execute(input:)` method that returns a string"
50
87
  end
@@ -51,13 +51,18 @@ module Langchain
51
51
  #
52
52
  # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
53
53
  #
54
- # @param text [String] The text to validate
54
+ # @param content [String | Array<String>] The text or array of texts to validate
55
55
  # @param model_name [String] The model name to validate against
56
56
  # @return [Integer] Whether the text is valid or not
57
57
  # @raise [TokenLimitExceeded] If the text is too long
58
58
  #
59
- def self.validate_max_tokens!(text, model_name)
60
- text_token_length = token_length(text, model_name)
59
+ def self.validate_max_tokens!(content, model_name)
60
+ text_token_length = if content.is_a?(Array)
61
+ content.sum { |item| token_length(item.to_json, model_name) }
62
+ else
63
+ token_length(content, model_name)
64
+ end
65
+
61
66
  max_tokens = TOKEN_LIMITS[model_name] - text_token_length
62
67
 
63
68
  # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
@@ -3,6 +3,88 @@
3
3
  require "forwardable"
4
4
 
5
5
  module Langchain::Vectorsearch
6
+ # = Vector Databases
7
+ # A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
8
+ #
9
+ # == Available vector databases
10
+ #
11
+ # - {Langchain::Vectorsearch::Chroma}
12
+ # - {Langchain::Vectorsearch::Milvus}
13
+ # - {Langchain::Vectorsearch::Pinecone}
14
+ # - {Langchain::Vectorsearch::Qdrant}
15
+ # - {Langchain::Vectorsearch::Weaviate}
16
+ # - {Langchain::Vectorsearch::Pgvector}
17
+ #
18
+ # == Usage
19
+ #
20
+ # 1. Pick a vector database from list.
21
+ # 2. Review its documentation to install the required gems, and create an account, get an API key, etc
22
+ # 3. Instantiate the vector database class:
23
+ #
24
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(
25
+ # url: ENV["WEAVIATE_URL"],
26
+ # api_key: ENV["WEAVIATE_API_KEY"],
27
+ # index_name: "Documents",
28
+ # llm: :openai, # or :cohere, :hugging_face, :google_palm, or :replicate
29
+ # llm_api_key: ENV["OPENAI_API_KEY"] # API key for the selected LLM
30
+ # )
31
+ #
32
+ # # You can instantiate other supported vector databases the same way:
33
+ # milvus = Langchain::Vectorsearch::Milvus.new(...)
34
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(...)
35
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(...)
36
+ # chrome = Langchain::Vectorsearch::Chroma.new(...)
37
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(...)
38
+ #
39
+ # == Schema Creation
40
+ #
41
+ # `create_default_schema()` creates default schema in your vector database.
42
+ #
43
+ # search.create_default_schema
44
+ #
45
+ # (We plan on offering customizable schema creation shortly)
46
+ #
47
+ # == Adding Data
48
+ #
49
+ # You can add data with:
50
+ # 1. `add_data(path:, paths:)` to add any kind of data type
51
+ #
52
+ # my_pdf = Langchain.root.join("path/to/my.pdf")
53
+ # my_text = Langchain.root.join("path/to/my.txt")
54
+ # my_docx = Langchain.root.join("path/to/my.docx")
55
+ # my_csv = Langchain.root.join("path/to/my.csv")
56
+ #
57
+ # search.add_data(paths: [my_pdf, my_text, my_docx, my_csv])
58
+ #
59
+ # 2. `add_texts(texts:)` to only add textual data
60
+ #
61
+ # search.add_texts(
62
+ # texts: [
63
+ # "Lorem Ipsum is simply dummy text of the printing and typesetting industry.",
64
+ # "Lorem Ipsum has been the industry's standard dummy text ever since the 1500s"
65
+ # ]
66
+ # )
67
+ #
68
+ # == Retrieving Data
69
+ #
70
+ # `similarity_search_by_vector(embedding:, k:)` searches the vector database for the closest `k` number of embeddings.
71
+ #
72
+ # search.similarity_search_by_vector(
73
+ # embedding: ...,
74
+ # k: # number of results to be retrieved
75
+ # )
76
+ #
77
+ # `vector_store.similarity_search(query:, k:)` generates an embedding for the query and searches the vector database for the closest `k` number of embeddings.
78
+ #
79
+ # search.similarity_search_by_vector(
80
+ # embedding: ...,
81
+ # k: # number of results to be retrieved
82
+ # )
83
+ #
84
+ # `ask(question:)` generates an embedding for the passed-in question, searches the vector database for closest embeddings and then passes these as context to the LLM to generate an answer to the question.
85
+ #
86
+ # search.ask(question: "What is lorem ipsum?")
87
+ #
6
88
  class Base
7
89
  include Langchain::DependencyHelper
8
90
  extend Forwardable
@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "chroma-db", "~> 0.3.0"
9
9
  #
10
10
  # Usage:
11
- # chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, llm_api_key:, api_key: nil)
11
+ # chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, api_key: nil)
12
12
  #
13
13
 
14
14
  # Initialize the Chroma client
@@ -8,9 +8,17 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "milvus", "~> 0.9.0"
9
9
  #
10
10
  # Usage:
11
- # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
11
+ # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:)
12
12
  #
13
13
 
14
+ #
15
+ # Initialize the Milvus client
16
+ #
17
+ # @param url [String] The URL of the Milvus server
18
+ # @param api_key [String] The API key to use
19
+ # @param index_name [String] The name of the index to use
20
+ # @param llm [Object] The LLM client to use
21
+ #
14
22
  def initialize(url:, index_name:, llm:, api_key: nil)
15
23
  depends_on "milvus"
16
24
  require "milvus"
@@ -21,6 +29,11 @@ module Langchain::Vectorsearch
21
29
  super(llm: llm)
22
30
  end
23
31
 
32
+ #
33
+ # Add a list of texts to the index
34
+ #
35
+ # @param texts [Array] The list of texts to add
36
+ #
24
37
  def add_texts(texts:)
25
38
  client.entities.insert(
26
39
  collection_name: index_name,
@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pgvector", "~> 0.2"
9
9
  #
10
10
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
11
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:)
12
12
  #
13
13
 
14
14
  # The operators supported by the PostgreSQL vector search adapter
@@ -20,10 +20,14 @@ module Langchain::Vectorsearch
20
20
 
21
21
  attr_reader :operator, :quoted_table_name
22
22
 
23
+ #
24
+ # Initialize the PostgreSQL client
25
+ #
23
26
  # @param url [String] The URL of the PostgreSQL database
24
27
  # @param index_name [String] The name of the table to use for the index
25
28
  # @param llm [Object] The LLM client to use
26
29
  # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
30
+ #
27
31
  def initialize(url:, index_name:, llm:, api_key: nil)
28
32
  require "pg"
29
33
  require "pgvector"
@@ -8,14 +8,17 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pinecone", "~> 0.1.6"
9
9
  #
10
10
  # Usage:
11
- # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
11
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:)
12
12
  #
13
13
 
14
+ #
14
15
  # Initialize the Pinecone client
16
+ #
15
17
  # @param environment [String] The environment to use
16
18
  # @param api_key [String] The API key to use
17
19
  # @param index_name [String] The name of the index to use
18
20
  # @param llm [Object] The LLM client to use
21
+ #
19
22
  def initialize(environment:, api_key:, index_name:, llm:)
20
23
  depends_on "pinecone"
21
24
  require "pinecone"
@@ -8,14 +8,17 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "qdrant-ruby", "~> 0.9.0"
9
9
  #
10
10
  # Usage:
11
- # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
11
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:)
12
12
  #
13
13
 
14
+ #
14
15
  # Initialize the Qdrant client
16
+ #
15
17
  # @param url [String] The URL of the Qdrant server
16
18
  # @param api_key [String] The API key to use
17
19
  # @param index_name [String] The name of the index to use
18
20
  # @param llm [Object] The LLM client to use
21
+ #
19
22
  def initialize(url:, api_key:, index_name:, llm:)
20
23
  depends_on "qdrant-ruby"
21
24
  require "qdrant"
@@ -8,14 +8,17 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "weaviate-ruby", "~> 0.8.0"
9
9
  #
10
10
  # Usage:
11
- # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
11
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:)
12
12
  #
13
13
 
14
+ #
14
15
  # Initialize the Weaviate adapter
16
+ #
15
17
  # @param url [String] The URL of the Weaviate instance
16
18
  # @param api_key [String] The API key to use
17
19
  # @param index_name [String] The name of the index to use
18
20
  # @param llm [Object] The LLM client to use
21
+ #
19
22
  def initialize(url:, api_key:, index_name:, llm:)
20
23
  depends_on "weaviate-ruby"
21
24
  require "weaviate"
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.5.2"
4
+ VERSION = "0.5.3"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -6,10 +6,53 @@ require "colorize"
6
6
 
7
7
  require_relative "./langchain/version"
8
8
 
9
+ # Langchain.rb a is library for building LLM-backed Ruby applications. It is an abstraction layer that sits on top of the emerging AI-related tools that makes it easy for developers to consume and string those services together.
10
+ #
11
+ # = Installation
12
+ # Install the gem and add to the application's Gemfile by executing:
13
+ #
14
+ # $ bundle add langchainrb
15
+ #
16
+ # If bundler is not being used to manage dependencies, install the gem by executing:
17
+ #
18
+ # $ gem install langchainrb
19
+ #
20
+ # Require the gem to start using it:
21
+ #
22
+ # require "langchain"
23
+ #
24
+ # = Concepts
25
+ #
26
+ # == Processors
27
+ # Processors load and parse/process various data types such as CSVs, PDFs, Word documents, HTML pages, and others.
28
+ #
29
+ # == Chunkers
30
+ # Chunkers split data based on various available options such as delimeters, chunk sizes or custom-defined functions. Chunkers are used when data needs to be split up before being imported in vector databases.
31
+ #
32
+ # == Prompts
33
+ # Prompts are structured inputs to the LLMs. Prompts provide instructions, context and other user input that LLMs use to generate responses.
34
+ #
35
+ # == Large Language Models (LLMs)
36
+ # LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
37
+ #
38
+ # == Vectorsearch Databases
39
+ # Vector database is a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
40
+ #
41
+ # == Embedding
42
+ # Word embedding or word vector is an approach with which we represent documents and words. It is defined as a numeric vector input that allows words with similar meanings to have the same representation. It can approximate meaning and represent a word in a lower dimensional space.
43
+ #
44
+ #
45
+ # = Logging
46
+ #
47
+ # LangChain.rb uses standard logging mechanisms and defaults to :debug level. Most messages are at info level, but we will add debug or warn statements as needed. To show all log messages:
48
+ #
49
+ # Langchain.logger.level = :info
9
50
  module Langchain
10
51
  class << self
52
+ # @return [Logger]
11
53
  attr_accessor :logger
12
54
 
55
+ # @return [Pathname]
13
56
  attr_reader :root
14
57
  end
15
58
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-08 00:00:00.000000000 Z
11
+ date: 2023-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: tiktoken_ruby
@@ -170,14 +170,14 @@ dependencies:
170
170
  requirements:
171
171
  - - "~>"
172
172
  - !ruby/object:Gem::Version
173
- version: 0.1.0
173
+ version: 0.1.1
174
174
  type: :development
175
175
  prerelease: false
176
176
  version_requirements: !ruby/object:Gem::Requirement
177
177
  requirements:
178
178
  - - "~>"
179
179
  - !ruby/object:Gem::Version
180
- version: 0.1.0
180
+ version: 0.1.1
181
181
  - !ruby/object:Gem::Dependency
182
182
  name: google_search_results
183
183
  requirement: !ruby/object:Gem::Requirement
@@ -495,7 +495,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
495
495
  - !ruby/object:Gem::Version
496
496
  version: '0'
497
497
  requirements: []
498
- rubygems_version: 3.2.3
498
+ rubygems_version: 3.3.7
499
499
  signing_key:
500
500
  specification_version: 4
501
501
  summary: Build LLM-backed Ruby applications with Ruby's LangChain