langchainrb 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73f980d6a7dd67d0112038a8266a05f8b5697e05c98e61a94598d38406de7c8b
4
- data.tar.gz: 8abc93ad6da8ad05d76ac35eff9aaab963c33549acb94bda4dd83daddeb71f4d
3
+ metadata.gz: 78810f63a496c6b98208a9c838cbdae41a8c944879e68f16fc4362de90c49110
4
+ data.tar.gz: c95d357da62c8120a2a105a94b219ca1f3552f85fff30bb7cb3d40def336baeb
5
5
  SHA512:
6
- metadata.gz: 7b5450e51ee732a1e2414e3db5f8a46d113d0b537b561f95556756e2854c9bb9175c898388acc2bb8672b2479e647625d3166580b7b1b25eb6cdc86ff6d42aee
7
- data.tar.gz: b5843004533f952782946e6a753aa5306c6ad4a5f97887416f8f10f4192ca1f88d00d30624cd62581022314649e5d291d9c1ab46f2bab31f9455860fc533c83d
6
+ metadata.gz: ee0c549ecebd98ce940b6dc05c8aa2783c265d7cb3903ca30448be0f906e89f353e419b2bb862178fe9081baa002b42fd7aaf88ec244a63beec9bc862e3a9410
7
+ data.tar.gz: a4b67c5b0d268d6b96622209fe3201c8585bf44d1d44dca0bc061de3f1ba1797e87df61111ddc6565b0d75b23a06677aa3bad6e41fbd4a119ff69f6b11e756ee
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.4] - 2023-07-01
4
+ - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
5
+ - Introduce `ConversationMemory`
6
+ - Allow loading multiple files from a directory
7
+ - Add `get_default_schema()`, `create_default_schema()`, `destroy_default_schema()` missing methods to `Langchain::Vectorsearch::*` classes
8
+
3
9
  ## [0.6.3] - 2023-06-25
4
10
  - Add #destroy_default_schema() to Langchain::Vectorsearch::* classes
5
11
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.3)
4
+ langchainrb (0.6.4)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -0,0 +1,52 @@
1
+ require "langchain"
2
+ require "reline"
3
+
4
+ # gem install reline
5
+ # or add `gem "reline"` to your Gemfile
6
+
7
+ openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
8
+
9
+ chat = Langchain::Conversation.new(llm: openai)
10
+ chat.set_context("You are a chatbot from the future")
11
+
12
+ DONE = %w[done end eof exit].freeze
13
+
14
+ puts "Welcome to the chatbot from the future!"
15
+
16
+ def prompt_for_message
17
+ puts "(multiline input; type 'end' on its own line when done. or exit to exit)"
18
+
19
+ user_message = Reline.readmultiline("Question: ", true) do |multiline_input|
20
+ last = multiline_input.split.last
21
+ DONE.include?(last)
22
+ end
23
+
24
+ return :noop unless user_message
25
+
26
+ lines = user_message.split("\n")
27
+ if lines.size > 1 && DONE.include?(lines.last)
28
+ # remove the "done" from the message
29
+ user_message = lines[0..-2].join("\n")
30
+ end
31
+
32
+ return :exit if DONE.include?(user_message.downcase)
33
+
34
+ user_message
35
+ end
36
+
37
+ begin
38
+ loop do
39
+ user_message = prompt_for_message
40
+
41
+ case user_message
42
+ when :noop
43
+ next
44
+ when :exit
45
+ break
46
+ end
47
+
48
+ puts chat.message(user_message)
49
+ end
50
+ rescue Interrupt
51
+ exit 0
52
+ end
@@ -17,10 +17,7 @@ module Langchain
17
17
  # end
18
18
  #
19
19
  class Conversation
20
- attr_reader :context, :examples, :messages
21
-
22
- # The least number of tokens we want to be under the limit by
23
- TOKEN_LEEWAY = 20
20
+ attr_reader :options
24
21
 
25
22
  # Intialize Conversation with a LLM
26
23
  #
@@ -31,7 +28,11 @@ module Langchain
31
28
  @llm = llm
32
29
  @context = nil
33
30
  @examples = []
34
- @messages = options.delete(:messages) || []
31
+ @memory = ConversationMemory.new(
32
+ llm: llm,
33
+ messages: options.delete(:messages) || [],
34
+ strategy: options.delete(:memory_strategy)
35
+ )
35
36
  @options = options
36
37
  @block = block
37
38
  end
@@ -39,59 +40,50 @@ module Langchain
39
40
  # Set the context of the conversation. Usually used to set the model's persona.
40
41
  # @param message [String] The context of the conversation
41
42
  def set_context(message)
42
- @context = message
43
+ @memory.set_context message
43
44
  end
44
45
 
45
46
  # Add examples to the conversation. Used to give the model a sense of the conversation.
46
47
  # @param examples [Array<Hash>] The examples to add to the conversation
47
48
  def add_examples(examples)
48
- @examples.concat examples
49
+ @memory.add_examples examples
49
50
  end
50
51
 
51
52
  # Message the model with a prompt and return the response.
52
53
  # @param message [String] The prompt to message the model with
53
54
  # @return [String] The response from the model
54
55
  def message(message)
55
- append_user_message(message)
56
+ @memory.append_user_message(message)
56
57
  response = llm_response(message)
57
- append_ai_message(response)
58
+ @memory.append_ai_message(response)
58
59
  response
59
60
  end
60
61
 
61
- private
62
-
63
- def llm_response(prompt)
64
- @llm.chat(messages: @messages, context: @context, examples: @examples, **@options, &@block)
65
- rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
66
- raise exception if @messages.size == 1
67
-
68
- reduce_messages(exception.token_overflow)
69
- retry
62
+ # Messages from conversation memory
63
+ # @return [Array<Hash>] The messages from the conversation memory
64
+ def messages
65
+ @memory.messages
70
66
  end
71
67
 
72
- def reduce_messages(token_overflow)
73
- @messages = @messages.drop_while do |message|
74
- proceed = token_overflow > -TOKEN_LEEWAY
75
- token_overflow -= token_length(message.to_json, model_name, llm: @llm)
76
-
77
- proceed
78
- end
68
+ # Context from conversation memory
69
+ # @return [String] Context from conversation memory
70
+ def context
71
+ @memory.context
79
72
  end
80
73
 
81
- def append_ai_message(message)
82
- @messages << {role: "ai", content: message}
74
+ # Examples from conversation memory
75
+ # @return [Array<Hash>] Examples from the conversation memory
76
+ def examples
77
+ @memory.examples
83
78
  end
84
79
 
85
- def append_user_message(message)
86
- @messages << {role: "user", content: message}
87
- end
88
-
89
- def model_name
90
- @options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
91
- end
80
+ private
92
81
 
93
- def token_length(content, model_name, options)
94
- @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
82
+ def llm_response(prompt)
83
+ @llm.chat(messages: @memory.messages, context: @memory.context, examples: @memory.examples, **@options, &@block)
84
+ rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
85
+ @memory.reduce_messages(exception)
86
+ retry
95
87
  end
96
88
  end
97
89
  end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class ConversationMemory
5
+ attr_reader :examples, :messages
6
+
7
+ # The least number of tokens we want to be under the limit by
8
+ TOKEN_LEEWAY = 20
9
+
10
+ def initialize(llm:, messages: [], **options)
11
+ @llm = llm
12
+ @context = nil
13
+ @summary = nil
14
+ @examples = []
15
+ @messages = messages
16
+ @strategy = options.delete(:strategy) || :truncate
17
+ @options = options
18
+ end
19
+
20
+ def set_context(message)
21
+ @context = message
22
+ end
23
+
24
+ def add_examples(examples)
25
+ @examples.concat examples
26
+ end
27
+
28
+ def append_ai_message(message)
29
+ @messages << {role: "ai", content: message}
30
+ end
31
+
32
+ def append_user_message(message)
33
+ @messages << {role: "user", content: message}
34
+ end
35
+
36
+ def reduce_messages(exception)
37
+ case @strategy
38
+ when :truncate
39
+ truncate_messages(exception)
40
+ when :summarize
41
+ summarize_messages
42
+ else
43
+ raise "Unknown strategy: #{@options[:strategy]}"
44
+ end
45
+ end
46
+
47
+ def context
48
+ return if @context.nil? && @summary.nil?
49
+
50
+ [@context, @summary].compact.join("\n")
51
+ end
52
+
53
+ private
54
+
55
+ def truncate_messages(exception)
56
+ raise exception if @messages.size == 1
57
+
58
+ token_overflow = exception.token_overflow
59
+
60
+ @messages = @messages.drop_while do |message|
61
+ proceed = token_overflow > -TOKEN_LEEWAY
62
+ token_overflow -= token_length(message.to_json, model_name, llm: @llm)
63
+
64
+ proceed
65
+ end
66
+ end
67
+
68
+ def summarize_messages
69
+ history = [@summary, @messages.to_json].compact.join("\n")
70
+ partitions = [history[0, history.size / 2], history[history.size / 2, history.size]]
71
+
72
+ @summary = partitions.map { |messages| @llm.summarize(text: messages.to_json) }.join("\n")
73
+
74
+ @messages = [@messages.last]
75
+ end
76
+
77
+ def partition_messages
78
+ end
79
+
80
+ def model_name
81
+ @llm.class::DEFAULTS[:chat_completion_model_name]
82
+ end
83
+
84
+ def token_length(content, model_name, options)
85
+ @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
86
+ end
87
+ end
88
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ class ApiError < StandardError; end
5
+
4
6
  # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
5
7
  #
6
8
  # Langchain.rb provides a common interface to interact with all supported LLMs:
@@ -5,7 +5,7 @@ module Langchain::LLM
5
5
  # Wrapper around the Cohere API.
6
6
  #
7
7
  # Gem requirements:
8
- # gem "cohere-ruby", "~> 0.9.4"
8
+ # gem "cohere-ruby", "~> 0.9.5"
9
9
  #
10
10
  # Usage:
11
11
  # cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
@@ -125,7 +125,7 @@ module Langchain::LLM
125
125
 
126
126
  response = client.chat(parameters: parameters)
127
127
 
128
- raise "Chat completion failed: #{response}" if !response.empty? && response.dig("error")
128
+ raise Langchain::LLM::ApiError.new "Chat completion failed: #{response.dig("error", "message")}" if !response.empty? && response.dig("error")
129
129
 
130
130
  unless streaming
131
131
  response.dig("choices", 0, "message", "content")
@@ -51,6 +51,13 @@ module Langchain
51
51
  !!(@path =~ URI_REGEX)
52
52
  end
53
53
 
54
+ # Is the path a directory
55
+ #
56
+ # @return [Boolean] true if path is a directory
57
+ def directory?
58
+ File.directory?(@path)
59
+ end
60
+
54
61
  # Load data from a file or URL
55
62
  #
56
63
  # loader = Langchain::Loader.new("README.md")
@@ -69,15 +76,10 @@ module Langchain
69
76
  #
70
77
  # @return [Data] data that was loaded
71
78
  def load(&block)
72
- @raw_data = url? ? load_from_url : load_from_path
79
+ return process_data(load_from_url, &block) if url?
80
+ return load_from_directory(&block) if directory?
73
81
 
74
- data = if block
75
- yield @raw_data.read, @options
76
- else
77
- processor_klass.new(@options).parse(@raw_data)
78
- end
79
-
80
- Langchain::Data.new(data, source: @path)
82
+ process_data(load_from_path, &block)
81
83
  end
82
84
 
83
85
  private
@@ -92,6 +94,27 @@ module Langchain
92
94
  File.open(@path)
93
95
  end
94
96
 
97
+ def load_from_directory(&block)
98
+ Dir.glob(File.join(@path, "**/*")).map do |file|
99
+ # Only load and add to result files with supported extensions
100
+ Langchain::Loader.new(file, @options).load(&block)
101
+ rescue
102
+ UnknownFormatError nil
103
+ end.flatten.compact
104
+ end
105
+
106
+ def process_data(data, &block)
107
+ @raw_data = data
108
+
109
+ result = if block
110
+ yield @raw_data.read, @options
111
+ else
112
+ processor_klass.new(@options).parse(@raw_data)
113
+ end
114
+
115
+ Langchain::Data.new(result)
116
+ end
117
+
95
118
  def processor_klass
96
119
  raise UnknownFormatError unless (kind = find_processor)
97
120
 
@@ -37,6 +37,9 @@ module Langchain
37
37
  #
38
38
  def self.token_length(text, model_name = "chat-bison-001", options)
39
39
  response = options[:llm].client.count_message_tokens(model: model_name, prompt: text)
40
+
41
+ raise Langchain::LLM::ApiError.new(response["error"]["message"]) unless response["error"].nil?
42
+
40
43
  response.dig("tokenCount")
41
44
  end
42
45
 
@@ -98,6 +98,11 @@ module Langchain::Vectorsearch
98
98
  @llm = llm
99
99
  end
100
100
 
101
+ # Method supported by Vectorsearch DB to retrieve a default schema
102
+ def get_default_schema
103
+ raise NotImplementedError, "#{self.class.name} does not support retrieving a default schema"
104
+ end
105
+
101
106
  # Method supported by Vectorsearch DB to create a default schema
102
107
  def create_default_schema
103
108
  raise NotImplementedError, "#{self.class.name} does not support creating a default schema"
@@ -67,10 +67,17 @@ module Langchain::Vectorsearch
67
67
  ::Chroma::Resources::Collection.create(index_name)
68
68
  end
69
69
 
70
- # TODO: Uncomment and add the spec
71
- # def destroy_default_schema
72
- # ::Chroma::Resources::Collection.delete(index_name)
73
- # end
70
+ # Get the default schema
71
+ # @return [Hash] The response from the server
72
+ def get_default_schema
73
+ ::Chroma::Resources::Collection.get(index_name)
74
+ end
75
+
76
+ # Delete the default schema
77
+ # @return [Hash] The response from the server
78
+ def destroy_default_schema
79
+ ::Chroma::Resources::Collection.delete(index_name)
80
+ end
74
81
 
75
82
  # Search for similar texts
76
83
  # @param query [String] The text to search for
@@ -10,8 +10,7 @@ module Langchain::Vectorsearch
10
10
  # gem "hnswlib", "~> 0.8.1"
11
11
  #
12
12
  # Usage:
13
- # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, url:, index_name:)
14
- #
13
+ # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, path_to_index:)
15
14
 
16
15
  attr_reader :client, :path_to_index
17
16
 
@@ -79,7 +79,17 @@ module Langchain::Vectorsearch
79
79
  )
80
80
  end
81
81
 
82
- # TODO: Add destroy_default_schema method
82
+ # Get the default schema
83
+ # @return [Hash] The response from the server
84
+ def get_default_schema
85
+ client.collections.get(collection_name: index_name)
86
+ end
87
+
88
+ # Delete default schema
89
+ # @return [Hash] The response from the server
90
+ def destroy_default_schema
91
+ client.collections.delete(collection_name: index_name)
92
+ end
83
93
 
84
94
  def similarity_search(query:, k: 4)
85
95
  embedding = llm.embed(text: query)
@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
85
85
  client.delete_index(index_name)
86
86
  end
87
87
 
88
+ # Get the default schema
89
+ # @return [Pinecone::Vector] The default schema
90
+ def get_default_schema
91
+ index
92
+ end
93
+
88
94
  # Search for similar texts
89
95
  # @param query [String] The text to search for
90
96
  # @param k [Integer] The number of results to return
@@ -32,11 +32,12 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:, ids:)
35
+ def add_texts(texts:, ids: [])
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
38
  Array(texts).each_with_index do |text, i|
39
- batch[:ids].push(ids[i] || SecureRandom.uuid)
39
+ id = ids[i] || SecureRandom.uuid
40
+ batch[:ids].push(id)
40
41
  batch[:vectors].push(llm.embed(text: text))
41
42
  batch[:payloads].push({content: text})
42
43
  end
@@ -51,6 +52,12 @@ module Langchain::Vectorsearch
51
52
  add_texts(texts: texts, ids: ids)
52
53
  end
53
54
 
55
+ # Get the default schema
56
+ # @return [Hash] The response from the server
57
+ def get_default_schema
58
+ client.collections.get(collection_name: index_name)
59
+ end
60
+
54
61
  # Deletes the default schema
55
62
  # @return [Hash] The response from the server
56
63
  def destroy_default_schema
@@ -109,7 +116,7 @@ module Langchain::Vectorsearch
109
116
  def ask(question:)
110
117
  search_results = similarity_search(query: question)
111
118
 
112
- context = search_results.dig("result").map do |result|
119
+ context = search_results.map do |result|
113
120
  result.dig("payload").to_s
114
121
  end
115
122
  context = context.join("\n---\n")
@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
85
85
  )
86
86
  end
87
87
 
88
+ # Get default schema
89
+ # @return [Hash] The response from the server
90
+ def get_default_schema
91
+ client.schema.get(class_name: index_name)
92
+ end
93
+
88
94
  # Delete the index
89
95
  # @return [Boolean] Whether the index was deleted
90
96
  def destroy_default_schema
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.3"
4
+ VERSION = "0.6.4"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -51,6 +51,7 @@ module Langchain
51
51
  autoload :Loader, "langchain/loader"
52
52
  autoload :Data, "langchain/data"
53
53
  autoload :Conversation, "langchain/conversation"
54
+ autoload :ConversationMemory, "langchain/conversation_memory"
54
55
  autoload :DependencyHelper, "langchain/dependency_helper"
55
56
  autoload :ContextualLogger, "langchain/contextual_logger"
56
57
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-26 00:00:00.000000000 Z
11
+ date: 2023-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -474,6 +474,7 @@ files:
474
474
  - LICENSE.txt
475
475
  - README.md
476
476
  - Rakefile
477
+ - examples/conversation_with_openai.rb
477
478
  - examples/create_and_manage_few_shot_prompt_templates.rb
478
479
  - examples/create_and_manage_prompt_templates.rb
479
480
  - examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
@@ -494,6 +495,7 @@ files:
494
495
  - lib/langchain/chunker/text.rb
495
496
  - lib/langchain/contextual_logger.rb
496
497
  - lib/langchain/conversation.rb
498
+ - lib/langchain/conversation_memory.rb
497
499
  - lib/langchain/data.rb
498
500
  - lib/langchain/dependency_helper.rb
499
501
  - lib/langchain/llm/ai21.rb