langchainrb 0.6.3 → 0.6.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 73f980d6a7dd67d0112038a8266a05f8b5697e05c98e61a94598d38406de7c8b
4
- data.tar.gz: 8abc93ad6da8ad05d76ac35eff9aaab963c33549acb94bda4dd83daddeb71f4d
3
+ metadata.gz: 78810f63a496c6b98208a9c838cbdae41a8c944879e68f16fc4362de90c49110
4
+ data.tar.gz: c95d357da62c8120a2a105a94b219ca1f3552f85fff30bb7cb3d40def336baeb
5
5
  SHA512:
6
- metadata.gz: 7b5450e51ee732a1e2414e3db5f8a46d113d0b537b561f95556756e2854c9bb9175c898388acc2bb8672b2479e647625d3166580b7b1b25eb6cdc86ff6d42aee
7
- data.tar.gz: b5843004533f952782946e6a753aa5306c6ad4a5f97887416f8f10f4192ca1f88d00d30624cd62581022314649e5d291d9c1ab46f2bab31f9455860fc533c83d
6
+ metadata.gz: ee0c549ecebd98ce940b6dc05c8aa2783c265d7cb3903ca30448be0f906e89f353e419b2bb862178fe9081baa002b42fd7aaf88ec244a63beec9bc862e3a9410
7
+ data.tar.gz: a4b67c5b0d268d6b96622209fe3201c8585bf44d1d44dca0bc061de3f1ba1797e87df61111ddc6565b0d75b23a06677aa3bad6e41fbd4a119ff69f6b11e756ee
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.4] - 2023-07-01
4
+ - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
5
+ - Introduce `ConversationMemory`
6
+ - Allow loading multiple files from a directory
7
+ - Add `get_default_schema()`, `create_default_schema()`, `destroy_default_schema()` missing methods to `Langchain::Vectorsearch::*` classes
8
+
3
9
  ## [0.6.3] - 2023-06-25
4
10
  - Add #destroy_default_schema() to Langchain::Vectorsearch::* classes
5
11
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.3)
4
+ langchainrb (0.6.4)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -0,0 +1,52 @@
1
+ require "langchain"
2
+ require "reline"
3
+
4
+ # gem install reline
5
+ # or add `gem "reline"` to your Gemfile
6
+
7
+ openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
8
+
9
+ chat = Langchain::Conversation.new(llm: openai)
10
+ chat.set_context("You are a chatbot from the future")
11
+
12
+ DONE = %w[done end eof exit].freeze
13
+
14
+ puts "Welcome to the chatbot from the future!"
15
+
16
+ def prompt_for_message
17
+ puts "(multiline input; type 'end' on its own line when done. or exit to exit)"
18
+
19
+ user_message = Reline.readmultiline("Question: ", true) do |multiline_input|
20
+ last = multiline_input.split.last
21
+ DONE.include?(last)
22
+ end
23
+
24
+ return :noop unless user_message
25
+
26
+ lines = user_message.split("\n")
27
+ if lines.size > 1 && DONE.include?(lines.last)
28
+ # remove the "done" from the message
29
+ user_message = lines[0..-2].join("\n")
30
+ end
31
+
32
+ return :exit if DONE.include?(user_message.downcase)
33
+
34
+ user_message
35
+ end
36
+
37
+ begin
38
+ loop do
39
+ user_message = prompt_for_message
40
+
41
+ case user_message
42
+ when :noop
43
+ next
44
+ when :exit
45
+ break
46
+ end
47
+
48
+ puts chat.message(user_message)
49
+ end
50
+ rescue Interrupt
51
+ exit 0
52
+ end
@@ -17,10 +17,7 @@ module Langchain
17
17
  # end
18
18
  #
19
19
  class Conversation
20
- attr_reader :context, :examples, :messages
21
-
22
- # The least number of tokens we want to be under the limit by
23
- TOKEN_LEEWAY = 20
20
+ attr_reader :options
24
21
 
25
22
  # Intialize Conversation with a LLM
26
23
  #
@@ -31,7 +28,11 @@ module Langchain
31
28
  @llm = llm
32
29
  @context = nil
33
30
  @examples = []
34
- @messages = options.delete(:messages) || []
31
+ @memory = ConversationMemory.new(
32
+ llm: llm,
33
+ messages: options.delete(:messages) || [],
34
+ strategy: options.delete(:memory_strategy)
35
+ )
35
36
  @options = options
36
37
  @block = block
37
38
  end
@@ -39,59 +40,50 @@ module Langchain
39
40
  # Set the context of the conversation. Usually used to set the model's persona.
40
41
  # @param message [String] The context of the conversation
41
42
  def set_context(message)
42
- @context = message
43
+ @memory.set_context message
43
44
  end
44
45
 
45
46
  # Add examples to the conversation. Used to give the model a sense of the conversation.
46
47
  # @param examples [Array<Hash>] The examples to add to the conversation
47
48
  def add_examples(examples)
48
- @examples.concat examples
49
+ @memory.add_examples examples
49
50
  end
50
51
 
51
52
  # Message the model with a prompt and return the response.
52
53
  # @param message [String] The prompt to message the model with
53
54
  # @return [String] The response from the model
54
55
  def message(message)
55
- append_user_message(message)
56
+ @memory.append_user_message(message)
56
57
  response = llm_response(message)
57
- append_ai_message(response)
58
+ @memory.append_ai_message(response)
58
59
  response
59
60
  end
60
61
 
61
- private
62
-
63
- def llm_response(prompt)
64
- @llm.chat(messages: @messages, context: @context, examples: @examples, **@options, &@block)
65
- rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
66
- raise exception if @messages.size == 1
67
-
68
- reduce_messages(exception.token_overflow)
69
- retry
62
+ # Messages from conversation memory
63
+ # @return [Array<Hash>] The messages from the conversation memory
64
+ def messages
65
+ @memory.messages
70
66
  end
71
67
 
72
- def reduce_messages(token_overflow)
73
- @messages = @messages.drop_while do |message|
74
- proceed = token_overflow > -TOKEN_LEEWAY
75
- token_overflow -= token_length(message.to_json, model_name, llm: @llm)
76
-
77
- proceed
78
- end
68
+ # Context from conversation memory
69
+ # @return [String] Context from conversation memory
70
+ def context
71
+ @memory.context
79
72
  end
80
73
 
81
- def append_ai_message(message)
82
- @messages << {role: "ai", content: message}
74
+ # Examples from conversation memory
75
+ # @return [Array<Hash>] Examples from the conversation memory
76
+ def examples
77
+ @memory.examples
83
78
  end
84
79
 
85
- def append_user_message(message)
86
- @messages << {role: "user", content: message}
87
- end
88
-
89
- def model_name
90
- @options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
91
- end
80
+ private
92
81
 
93
- def token_length(content, model_name, options)
94
- @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
82
+ def llm_response(prompt)
83
+ @llm.chat(messages: @memory.messages, context: @memory.context, examples: @memory.examples, **@options, &@block)
84
+ rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
85
+ @memory.reduce_messages(exception)
86
+ retry
95
87
  end
96
88
  end
97
89
  end
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class ConversationMemory
5
+ attr_reader :examples, :messages
6
+
7
+ # The least number of tokens we want to be under the limit by
8
+ TOKEN_LEEWAY = 20
9
+
10
+ def initialize(llm:, messages: [], **options)
11
+ @llm = llm
12
+ @context = nil
13
+ @summary = nil
14
+ @examples = []
15
+ @messages = messages
16
+ @strategy = options.delete(:strategy) || :truncate
17
+ @options = options
18
+ end
19
+
20
+ def set_context(message)
21
+ @context = message
22
+ end
23
+
24
+ def add_examples(examples)
25
+ @examples.concat examples
26
+ end
27
+
28
+ def append_ai_message(message)
29
+ @messages << {role: "ai", content: message}
30
+ end
31
+
32
+ def append_user_message(message)
33
+ @messages << {role: "user", content: message}
34
+ end
35
+
36
+ def reduce_messages(exception)
37
+ case @strategy
38
+ when :truncate
39
+ truncate_messages(exception)
40
+ when :summarize
41
+ summarize_messages
42
+ else
43
+ raise "Unknown strategy: #{@options[:strategy]}"
44
+ end
45
+ end
46
+
47
+ def context
48
+ return if @context.nil? && @summary.nil?
49
+
50
+ [@context, @summary].compact.join("\n")
51
+ end
52
+
53
+ private
54
+
55
+ def truncate_messages(exception)
56
+ raise exception if @messages.size == 1
57
+
58
+ token_overflow = exception.token_overflow
59
+
60
+ @messages = @messages.drop_while do |message|
61
+ proceed = token_overflow > -TOKEN_LEEWAY
62
+ token_overflow -= token_length(message.to_json, model_name, llm: @llm)
63
+
64
+ proceed
65
+ end
66
+ end
67
+
68
+ def summarize_messages
69
+ history = [@summary, @messages.to_json].compact.join("\n")
70
+ partitions = [history[0, history.size / 2], history[history.size / 2, history.size]]
71
+
72
+ @summary = partitions.map { |messages| @llm.summarize(text: messages.to_json) }.join("\n")
73
+
74
+ @messages = [@messages.last]
75
+ end
76
+
77
+ def partition_messages
78
+ end
79
+
80
+ def model_name
81
+ @llm.class::DEFAULTS[:chat_completion_model_name]
82
+ end
83
+
84
+ def token_length(content, model_name, options)
85
+ @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
86
+ end
87
+ end
88
+ end
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::LLM
4
+ class ApiError < StandardError; end
5
+
4
6
  # A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
5
7
  #
6
8
  # Langchain.rb provides a common interface to interact with all supported LLMs:
@@ -5,7 +5,7 @@ module Langchain::LLM
5
5
  # Wrapper around the Cohere API.
6
6
  #
7
7
  # Gem requirements:
8
- # gem "cohere-ruby", "~> 0.9.4"
8
+ # gem "cohere-ruby", "~> 0.9.5"
9
9
  #
10
10
  # Usage:
11
11
  # cohere = Langchain::LLM::Cohere.new(api_key: "YOUR_API_KEY")
@@ -125,7 +125,7 @@ module Langchain::LLM
125
125
 
126
126
  response = client.chat(parameters: parameters)
127
127
 
128
- raise "Chat completion failed: #{response}" if !response.empty? && response.dig("error")
128
+ raise Langchain::LLM::ApiError.new "Chat completion failed: #{response.dig("error", "message")}" if !response.empty? && response.dig("error")
129
129
 
130
130
  unless streaming
131
131
  response.dig("choices", 0, "message", "content")
@@ -51,6 +51,13 @@ module Langchain
51
51
  !!(@path =~ URI_REGEX)
52
52
  end
53
53
 
54
+ # Is the path a directory
55
+ #
56
+ # @return [Boolean] true if path is a directory
57
+ def directory?
58
+ File.directory?(@path)
59
+ end
60
+
54
61
  # Load data from a file or URL
55
62
  #
56
63
  # loader = Langchain::Loader.new("README.md")
@@ -69,15 +76,10 @@ module Langchain
69
76
  #
70
77
  # @return [Data] data that was loaded
71
78
  def load(&block)
72
- @raw_data = url? ? load_from_url : load_from_path
79
+ return process_data(load_from_url, &block) if url?
80
+ return load_from_directory(&block) if directory?
73
81
 
74
- data = if block
75
- yield @raw_data.read, @options
76
- else
77
- processor_klass.new(@options).parse(@raw_data)
78
- end
79
-
80
- Langchain::Data.new(data, source: @path)
82
+ process_data(load_from_path, &block)
81
83
  end
82
84
 
83
85
  private
@@ -92,6 +94,27 @@ module Langchain
92
94
  File.open(@path)
93
95
  end
94
96
 
97
+ def load_from_directory(&block)
98
+ Dir.glob(File.join(@path, "**/*")).map do |file|
99
+ # Only load and add to result files with supported extensions
100
+ Langchain::Loader.new(file, @options).load(&block)
101
+ rescue
102
+ UnknownFormatError nil
103
+ end.flatten.compact
104
+ end
105
+
106
+ def process_data(data, &block)
107
+ @raw_data = data
108
+
109
+ result = if block
110
+ yield @raw_data.read, @options
111
+ else
112
+ processor_klass.new(@options).parse(@raw_data)
113
+ end
114
+
115
+ Langchain::Data.new(result)
116
+ end
117
+
95
118
  def processor_klass
96
119
  raise UnknownFormatError unless (kind = find_processor)
97
120
 
@@ -37,6 +37,9 @@ module Langchain
37
37
  #
38
38
  def self.token_length(text, model_name = "chat-bison-001", options)
39
39
  response = options[:llm].client.count_message_tokens(model: model_name, prompt: text)
40
+
41
+ raise Langchain::LLM::ApiError.new(response["error"]["message"]) unless response["error"].nil?
42
+
40
43
  response.dig("tokenCount")
41
44
  end
42
45
 
@@ -98,6 +98,11 @@ module Langchain::Vectorsearch
98
98
  @llm = llm
99
99
  end
100
100
 
101
+ # Method supported by Vectorsearch DB to retrieve a default schema
102
+ def get_default_schema
103
+ raise NotImplementedError, "#{self.class.name} does not support retrieving a default schema"
104
+ end
105
+
101
106
  # Method supported by Vectorsearch DB to create a default schema
102
107
  def create_default_schema
103
108
  raise NotImplementedError, "#{self.class.name} does not support creating a default schema"
@@ -67,10 +67,17 @@ module Langchain::Vectorsearch
67
67
  ::Chroma::Resources::Collection.create(index_name)
68
68
  end
69
69
 
70
- # TODO: Uncomment and add the spec
71
- # def destroy_default_schema
72
- # ::Chroma::Resources::Collection.delete(index_name)
73
- # end
70
+ # Get the default schema
71
+ # @return [Hash] The response from the server
72
+ def get_default_schema
73
+ ::Chroma::Resources::Collection.get(index_name)
74
+ end
75
+
76
+ # Delete the default schema
77
+ # @return [Hash] The response from the server
78
+ def destroy_default_schema
79
+ ::Chroma::Resources::Collection.delete(index_name)
80
+ end
74
81
 
75
82
  # Search for similar texts
76
83
  # @param query [String] The text to search for
@@ -10,8 +10,7 @@ module Langchain::Vectorsearch
10
10
  # gem "hnswlib", "~> 0.8.1"
11
11
  #
12
12
  # Usage:
13
- # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, url:, index_name:)
14
- #
13
+ # hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, path_to_index:)
15
14
 
16
15
  attr_reader :client, :path_to_index
17
16
 
@@ -79,7 +79,17 @@ module Langchain::Vectorsearch
79
79
  )
80
80
  end
81
81
 
82
- # TODO: Add destroy_default_schema method
82
+ # Get the default schema
83
+ # @return [Hash] The response from the server
84
+ def get_default_schema
85
+ client.collections.get(collection_name: index_name)
86
+ end
87
+
88
+ # Delete default schema
89
+ # @return [Hash] The response from the server
90
+ def destroy_default_schema
91
+ client.collections.delete(collection_name: index_name)
92
+ end
83
93
 
84
94
  def similarity_search(query:, k: 4)
85
95
  embedding = llm.embed(text: query)
@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
85
85
  client.delete_index(index_name)
86
86
  end
87
87
 
88
+ # Get the default schema
89
+ # @return [Pinecone::Vector] The default schema
90
+ def get_default_schema
91
+ index
92
+ end
93
+
88
94
  # Search for similar texts
89
95
  # @param query [String] The text to search for
90
96
  # @param k [Integer] The number of results to return
@@ -32,11 +32,12 @@ module Langchain::Vectorsearch
32
32
  # Add a list of texts to the index
33
33
  # @param texts [Array] The list of texts to add
34
34
  # @return [Hash] The response from the server
35
- def add_texts(texts:, ids:)
35
+ def add_texts(texts:, ids: [])
36
36
  batch = {ids: [], vectors: [], payloads: []}
37
37
 
38
38
  Array(texts).each_with_index do |text, i|
39
- batch[:ids].push(ids[i] || SecureRandom.uuid)
39
+ id = ids[i] || SecureRandom.uuid
40
+ batch[:ids].push(id)
40
41
  batch[:vectors].push(llm.embed(text: text))
41
42
  batch[:payloads].push({content: text})
42
43
  end
@@ -51,6 +52,12 @@ module Langchain::Vectorsearch
51
52
  add_texts(texts: texts, ids: ids)
52
53
  end
53
54
 
55
+ # Get the default schema
56
+ # @return [Hash] The response from the server
57
+ def get_default_schema
58
+ client.collections.get(collection_name: index_name)
59
+ end
60
+
54
61
  # Deletes the default schema
55
62
  # @return [Hash] The response from the server
56
63
  def destroy_default_schema
@@ -109,7 +116,7 @@ module Langchain::Vectorsearch
109
116
  def ask(question:)
110
117
  search_results = similarity_search(query: question)
111
118
 
112
- context = search_results.dig("result").map do |result|
119
+ context = search_results.map do |result|
113
120
  result.dig("payload").to_s
114
121
  end
115
122
  context = context.join("\n---\n")
@@ -85,6 +85,12 @@ module Langchain::Vectorsearch
85
85
  )
86
86
  end
87
87
 
88
+ # Get default schema
89
+ # @return [Hash] The response from the server
90
+ def get_default_schema
91
+ client.schema.get(class_name: index_name)
92
+ end
93
+
88
94
  # Delete the index
89
95
  # @return [Boolean] Whether the index was deleted
90
96
  def destroy_default_schema
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.3"
4
+ VERSION = "0.6.4"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -51,6 +51,7 @@ module Langchain
51
51
  autoload :Loader, "langchain/loader"
52
52
  autoload :Data, "langchain/data"
53
53
  autoload :Conversation, "langchain/conversation"
54
+ autoload :ConversationMemory, "langchain/conversation_memory"
54
55
  autoload :DependencyHelper, "langchain/dependency_helper"
55
56
  autoload :ContextualLogger, "langchain/contextual_logger"
56
57
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.3
4
+ version: 0.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-26 00:00:00.000000000 Z
11
+ date: 2023-07-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -474,6 +474,7 @@ files:
474
474
  - LICENSE.txt
475
475
  - README.md
476
476
  - Rakefile
477
+ - examples/conversation_with_openai.rb
477
478
  - examples/create_and_manage_few_shot_prompt_templates.rb
478
479
  - examples/create_and_manage_prompt_templates.rb
479
480
  - examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
@@ -494,6 +495,7 @@ files:
494
495
  - lib/langchain/chunker/text.rb
495
496
  - lib/langchain/contextual_logger.rb
496
497
  - lib/langchain/conversation.rb
498
+ - lib/langchain/conversation_memory.rb
497
499
  - lib/langchain/data.rb
498
500
  - lib/langchain/dependency_helper.rb
499
501
  - lib/langchain/llm/ai21.rb