langchainrb 0.6.2 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/README.md +41 -7
- data/examples/conversation_with_openai.rb +52 -0
- data/lib/langchain/active_record/hooks.rb +1 -1
- data/lib/langchain/conversation.rb +28 -36
- data/lib/langchain/conversation_memory.rb +88 -0
- data/lib/langchain/llm/base.rb +2 -0
- data/lib/langchain/llm/cohere.rb +1 -1
- data/lib/langchain/llm/openai.rb +1 -1
- data/lib/langchain/loader.rb +31 -8
- data/lib/langchain/utils/token_length/google_palm_validator.rb +3 -0
- data/lib/langchain/vectorsearch/base.rb +10 -0
- data/lib/langchain/vectorsearch/chroma.rb +12 -0
- data/lib/langchain/vectorsearch/hnswlib.rb +3 -2
- data/lib/langchain/vectorsearch/milvus.rb +14 -0
- data/lib/langchain/vectorsearch/pgvector.rb +2 -0
- data/lib/langchain/vectorsearch/pinecone.rb +12 -0
- data/lib/langchain/vectorsearch/qdrant.rb +16 -3
- data/lib/langchain/vectorsearch/weaviate.rb +13 -0
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -0
- metadata +4 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 78810f63a496c6b98208a9c838cbdae41a8c944879e68f16fc4362de90c49110
|
|
4
|
+
data.tar.gz: c95d357da62c8120a2a105a94b219ca1f3552f85fff30bb7cb3d40def336baeb
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: ee0c549ecebd98ce940b6dc05c8aa2783c265d7cb3903ca30448be0f906e89f353e419b2bb862178fe9081baa002b42fd7aaf88ec244a63beec9bc862e3a9410
|
|
7
|
+
data.tar.gz: a4b67c5b0d268d6b96622209fe3201c8585bf44d1d44dca0bc061de3f1ba1797e87df61111ddc6565b0d75b23a06677aa3bad6e41fbd4a119ff69f6b11e756ee
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.6.4] - 2023-07-01
|
|
4
|
+
- Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
|
|
5
|
+
- Introduce `ConversationMemory`
|
|
6
|
+
- Allow loading multiple files from a directory
|
|
7
|
+
- Add `get_default_schema()`, `create_default_schema()`, `destroy_default_schema()` missing methods to `Langchain::Vectorsearch::*` classes
|
|
8
|
+
|
|
9
|
+
## [0.6.3] - 2023-06-25
|
|
10
|
+
- Add #destroy_default_schema() to Langchain::Vectorsearch::* classes
|
|
11
|
+
|
|
3
12
|
## [0.6.2] - 2023-06-25
|
|
4
13
|
- Qdrant, Chroma, and Pinecone are supported by ActiveRecord hooks
|
|
5
14
|
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -110,6 +110,22 @@ client.ask(
|
|
|
110
110
|
)
|
|
111
111
|
```
|
|
112
112
|
|
|
113
|
+
## Integrating Vector Search into ActiveRecord models
|
|
114
|
+
```ruby
|
|
115
|
+
class Product < ActiveRecord::Base
|
|
116
|
+
vectorsearch provider: Langchain::Vectorsearch::Qdrant.new(
|
|
117
|
+
api_key: ENV["QDRANT_API_KEY"],
|
|
118
|
+
url: ENV["QDRANT_URL"],
|
|
119
|
+
index_name: "Products",
|
|
120
|
+
llm: Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
after_save :upsert_to_vectorsearch
|
|
124
|
+
end
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
Additional info [here](https://github.com/andreibondarev/langchainrb/blob/main/lib/langchain/active_record/hooks.rb#L10-L38).
|
|
128
|
+
|
|
113
129
|
### Using Standalone LLMs 🗣️
|
|
114
130
|
|
|
115
131
|
Add `gem "ruby-openai", "~> 4.0.0"` to your Gemfile.
|
|
@@ -370,15 +386,33 @@ Langchain.logger.level = :info
|
|
|
370
386
|
Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
|
|
371
387
|
|
|
372
388
|
## Core Contributors
|
|
373
|
-
[<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://
|
|
389
|
+
[<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://twitter.com/rushing_andrei)
|
|
374
390
|
|
|
375
|
-
##
|
|
376
|
-
[<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
|
|
377
|
-
[<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
|
|
378
|
-
[<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
|
|
391
|
+
## Contributors
|
|
379
392
|
[<img style="border-radius:50%" alt="Alex Chaplinsky" src="https://avatars.githubusercontent.com/u/695947?v=4" width="80" height="80" class="avatar">](https://github.com/alchaplinsky)
|
|
380
|
-
|
|
381
|
-
|
|
393
|
+
[<img style="border-radius:50%" alt="Josh Nichols" src="https://avatars.githubusercontent.com/u/159?v=4" width="80" height="80" class="avatar">](https://github.com/technicalpickles)
|
|
394
|
+
[<img style="border-radius:50%" alt="Matt Lindsey" src="https://avatars.githubusercontent.com/u/5638339?v=4" width="80" height="80" class="avatar">](https://github.com/mattlindsey)
|
|
395
|
+
[<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
|
|
396
|
+
[<img style="border-radius:50%" alt="Moeki Kawakami" src="https://avatars.githubusercontent.com/u/72325947?v=4" width="80" height="80" class="avatar">](https://github.com/moekidev)
|
|
397
|
+
[<img style="border-radius:50%" alt="Jens Stmrs" src="https://avatars.githubusercontent.com/u/3492669?v=4" width="80" height="80" class="avatar">](https://github.com/faustus7)
|
|
398
|
+
[<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
|
|
399
|
+
[<img style="border-radius:50%" alt="Piero Dotti" src="https://avatars.githubusercontent.com/u/5167659?v=4" width="80" height="80" class="avatar">](https://github.com/ProGM)
|
|
400
|
+
[<img style="border-radius:50%" alt="Michał Ciemięga" src="https://avatars.githubusercontent.com/u/389828?v=4" width="80" height="80" class="avatar">](https://github.com/zewelor)
|
|
401
|
+
[<img style="border-radius:50%" alt="Bruno Bornsztein" src="https://avatars.githubusercontent.com/u/3760?v=4" width="80" height="80" class="avatar">](https://github.com/bborn)
|
|
402
|
+
[<img style="border-radius:50%" alt="Tim Williams" src="https://avatars.githubusercontent.com/u/1192351?v=4" width="80" height="80" class="avatar">](https://github.com/timrwilliams)
|
|
403
|
+
[<img style="border-radius:50%" alt="Zhenhang Tung" src="https://avatars.githubusercontent.com/u/8170159?v=4" width="80" height="80" class="avatar">](https://github.com/ZhenhangTung)
|
|
404
|
+
[<img style="border-radius:50%" alt="Hama" src="https://avatars.githubusercontent.com/u/38002468?v=4" width="80" height="80" class="avatar">](https://github.com/akmhmgc)
|
|
405
|
+
[<img style="border-radius:50%" alt="Josh Weir" src="https://avatars.githubusercontent.com/u/10720337?v=4" width="80" height="80" class="avatar">](https://github.com/joshweir)
|
|
406
|
+
[<img style="border-radius:50%" alt="Arthur Hess" src="https://avatars.githubusercontent.com/u/446035?v=4" width="80" height="80" class="avatar">](https://github.com/arthurhess)
|
|
407
|
+
[<img style="border-radius:50%" alt="Jin Shen" src="https://avatars.githubusercontent.com/u/54917718?v=4" width="80" height="80" class="avatar">](https://github.com/jacshen-ebay)
|
|
408
|
+
[<img style="border-radius:50%" alt="Earle Bunao" src="https://avatars.githubusercontent.com/u/4653624?v=4" width="80" height="80" class="avatar">](https://github.com/erbunao)
|
|
409
|
+
[<img style="border-radius:50%" alt="Maël H." src="https://avatars.githubusercontent.com/u/61985678?v=4" width="80" height="80" class="avatar">](https://github.com/mael-ha)
|
|
410
|
+
[<img style="border-radius:50%" alt="Chris O. Adebiyi" src="https://avatars.githubusercontent.com/u/62605573?v=4" width="80" height="80" class="avatar">](https://github.com/oluvvafemi)
|
|
411
|
+
[<img style="border-radius:50%" alt="Aaron Breckenridge" src="https://avatars.githubusercontent.com/u/201360?v=4" width="80" height="80" class="avatar">](https://github.com/breckenedge)
|
|
412
|
+
|
|
413
|
+
## Star History
|
|
414
|
+
|
|
415
|
+
[](https://star-history.com/#andreibondarev/langchainrb&Date)
|
|
382
416
|
|
|
383
417
|
## Contributing
|
|
384
418
|
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require "langchain"
|
|
2
|
+
require "reline"
|
|
3
|
+
|
|
4
|
+
# gem install reline
|
|
5
|
+
# or add `gem "reline"` to your Gemfile
|
|
6
|
+
|
|
7
|
+
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
|
8
|
+
|
|
9
|
+
chat = Langchain::Conversation.new(llm: openai)
|
|
10
|
+
chat.set_context("You are a chatbot from the future")
|
|
11
|
+
|
|
12
|
+
DONE = %w[done end eof exit].freeze
|
|
13
|
+
|
|
14
|
+
puts "Welcome to the chatbot from the future!"
|
|
15
|
+
|
|
16
|
+
def prompt_for_message
|
|
17
|
+
puts "(multiline input; type 'end' on its own line when done. or exit to exit)"
|
|
18
|
+
|
|
19
|
+
user_message = Reline.readmultiline("Question: ", true) do |multiline_input|
|
|
20
|
+
last = multiline_input.split.last
|
|
21
|
+
DONE.include?(last)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
return :noop unless user_message
|
|
25
|
+
|
|
26
|
+
lines = user_message.split("\n")
|
|
27
|
+
if lines.size > 1 && DONE.include?(lines.last)
|
|
28
|
+
# remove the "done" from the message
|
|
29
|
+
user_message = lines[0..-2].join("\n")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
return :exit if DONE.include?(user_message.downcase)
|
|
33
|
+
|
|
34
|
+
user_message
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
begin
|
|
38
|
+
loop do
|
|
39
|
+
user_message = prompt_for_message
|
|
40
|
+
|
|
41
|
+
case user_message
|
|
42
|
+
when :noop
|
|
43
|
+
next
|
|
44
|
+
when :exit
|
|
45
|
+
break
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
puts chat.message(user_message)
|
|
49
|
+
end
|
|
50
|
+
rescue Interrupt
|
|
51
|
+
exit 0
|
|
52
|
+
end
|
|
@@ -35,7 +35,7 @@ module Langchain
|
|
|
35
35
|
# Query the vector search provider
|
|
36
36
|
# Recipe.similarity_search("carnivore dish")
|
|
37
37
|
# Delete the default schema to start over
|
|
38
|
-
# Recipe.class_variable_get(:@@provider).
|
|
38
|
+
# Recipe.class_variable_get(:@@provider).destroy_default_schema
|
|
39
39
|
#
|
|
40
40
|
module Hooks
|
|
41
41
|
def self.included(base)
|
|
@@ -17,10 +17,7 @@ module Langchain
|
|
|
17
17
|
# end
|
|
18
18
|
#
|
|
19
19
|
class Conversation
|
|
20
|
-
attr_reader :
|
|
21
|
-
|
|
22
|
-
# The least number of tokens we want to be under the limit by
|
|
23
|
-
TOKEN_LEEWAY = 20
|
|
20
|
+
attr_reader :options
|
|
24
21
|
|
|
25
22
|
# Intialize Conversation with a LLM
|
|
26
23
|
#
|
|
@@ -31,7 +28,11 @@ module Langchain
|
|
|
31
28
|
@llm = llm
|
|
32
29
|
@context = nil
|
|
33
30
|
@examples = []
|
|
34
|
-
@
|
|
31
|
+
@memory = ConversationMemory.new(
|
|
32
|
+
llm: llm,
|
|
33
|
+
messages: options.delete(:messages) || [],
|
|
34
|
+
strategy: options.delete(:memory_strategy)
|
|
35
|
+
)
|
|
35
36
|
@options = options
|
|
36
37
|
@block = block
|
|
37
38
|
end
|
|
@@ -39,59 +40,50 @@ module Langchain
|
|
|
39
40
|
# Set the context of the conversation. Usually used to set the model's persona.
|
|
40
41
|
# @param message [String] The context of the conversation
|
|
41
42
|
def set_context(message)
|
|
42
|
-
@
|
|
43
|
+
@memory.set_context message
|
|
43
44
|
end
|
|
44
45
|
|
|
45
46
|
# Add examples to the conversation. Used to give the model a sense of the conversation.
|
|
46
47
|
# @param examples [Array<Hash>] The examples to add to the conversation
|
|
47
48
|
def add_examples(examples)
|
|
48
|
-
@
|
|
49
|
+
@memory.add_examples examples
|
|
49
50
|
end
|
|
50
51
|
|
|
51
52
|
# Message the model with a prompt and return the response.
|
|
52
53
|
# @param message [String] The prompt to message the model with
|
|
53
54
|
# @return [String] The response from the model
|
|
54
55
|
def message(message)
|
|
55
|
-
append_user_message(message)
|
|
56
|
+
@memory.append_user_message(message)
|
|
56
57
|
response = llm_response(message)
|
|
57
|
-
append_ai_message(response)
|
|
58
|
+
@memory.append_ai_message(response)
|
|
58
59
|
response
|
|
59
60
|
end
|
|
60
61
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
def
|
|
64
|
-
@
|
|
65
|
-
rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
|
|
66
|
-
raise exception if @messages.size == 1
|
|
67
|
-
|
|
68
|
-
reduce_messages(exception.token_overflow)
|
|
69
|
-
retry
|
|
62
|
+
# Messages from conversation memory
|
|
63
|
+
# @return [Array<Hash>] The messages from the conversation memory
|
|
64
|
+
def messages
|
|
65
|
+
@memory.messages
|
|
70
66
|
end
|
|
71
67
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
proceed
|
|
78
|
-
end
|
|
68
|
+
# Context from conversation memory
|
|
69
|
+
# @return [String] Context from conversation memory
|
|
70
|
+
def context
|
|
71
|
+
@memory.context
|
|
79
72
|
end
|
|
80
73
|
|
|
81
|
-
|
|
82
|
-
|
|
74
|
+
# Examples from conversation memory
|
|
75
|
+
# @return [Array<Hash>] Examples from the conversation memory
|
|
76
|
+
def examples
|
|
77
|
+
@memory.examples
|
|
83
78
|
end
|
|
84
79
|
|
|
85
|
-
|
|
86
|
-
@messages << {role: "user", content: message}
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def model_name
|
|
90
|
-
@options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
|
|
91
|
-
end
|
|
80
|
+
private
|
|
92
81
|
|
|
93
|
-
def
|
|
94
|
-
@llm.
|
|
82
|
+
def llm_response(prompt)
|
|
83
|
+
@llm.chat(messages: @memory.messages, context: @memory.context, examples: @memory.examples, **@options, &@block)
|
|
84
|
+
rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
|
|
85
|
+
@memory.reduce_messages(exception)
|
|
86
|
+
retry
|
|
95
87
|
end
|
|
96
88
|
end
|
|
97
89
|
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Langchain
|
|
4
|
+
class ConversationMemory
|
|
5
|
+
attr_reader :examples, :messages
|
|
6
|
+
|
|
7
|
+
# The least number of tokens we want to be under the limit by
|
|
8
|
+
TOKEN_LEEWAY = 20
|
|
9
|
+
|
|
10
|
+
def initialize(llm:, messages: [], **options)
|
|
11
|
+
@llm = llm
|
|
12
|
+
@context = nil
|
|
13
|
+
@summary = nil
|
|
14
|
+
@examples = []
|
|
15
|
+
@messages = messages
|
|
16
|
+
@strategy = options.delete(:strategy) || :truncate
|
|
17
|
+
@options = options
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def set_context(message)
|
|
21
|
+
@context = message
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def add_examples(examples)
|
|
25
|
+
@examples.concat examples
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def append_ai_message(message)
|
|
29
|
+
@messages << {role: "ai", content: message}
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def append_user_message(message)
|
|
33
|
+
@messages << {role: "user", content: message}
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def reduce_messages(exception)
|
|
37
|
+
case @strategy
|
|
38
|
+
when :truncate
|
|
39
|
+
truncate_messages(exception)
|
|
40
|
+
when :summarize
|
|
41
|
+
summarize_messages
|
|
42
|
+
else
|
|
43
|
+
raise "Unknown strategy: #{@options[:strategy]}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def context
|
|
48
|
+
return if @context.nil? && @summary.nil?
|
|
49
|
+
|
|
50
|
+
[@context, @summary].compact.join("\n")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
def truncate_messages(exception)
|
|
56
|
+
raise exception if @messages.size == 1
|
|
57
|
+
|
|
58
|
+
token_overflow = exception.token_overflow
|
|
59
|
+
|
|
60
|
+
@messages = @messages.drop_while do |message|
|
|
61
|
+
proceed = token_overflow > -TOKEN_LEEWAY
|
|
62
|
+
token_overflow -= token_length(message.to_json, model_name, llm: @llm)
|
|
63
|
+
|
|
64
|
+
proceed
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def summarize_messages
|
|
69
|
+
history = [@summary, @messages.to_json].compact.join("\n")
|
|
70
|
+
partitions = [history[0, history.size / 2], history[history.size / 2, history.size]]
|
|
71
|
+
|
|
72
|
+
@summary = partitions.map { |messages| @llm.summarize(text: messages.to_json) }.join("\n")
|
|
73
|
+
|
|
74
|
+
@messages = [@messages.last]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def partition_messages
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def model_name
|
|
81
|
+
@llm.class::DEFAULTS[:chat_completion_model_name]
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def token_length(content, model_name, options)
|
|
85
|
+
@llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
data/lib/langchain/llm/base.rb
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Langchain::LLM
|
|
4
|
+
class ApiError < StandardError; end
|
|
5
|
+
|
|
4
6
|
# A LLM is a language model consisting of a neural network with many parameters (typically billions of weights or more), trained on large quantities of unlabeled text using self-supervised learning or semi-supervised learning.
|
|
5
7
|
#
|
|
6
8
|
# Langchain.rb provides a common interface to interact with all supported LLMs:
|
data/lib/langchain/llm/cohere.rb
CHANGED
data/lib/langchain/llm/openai.rb
CHANGED
|
@@ -125,7 +125,7 @@ module Langchain::LLM
|
|
|
125
125
|
|
|
126
126
|
response = client.chat(parameters: parameters)
|
|
127
127
|
|
|
128
|
-
raise "Chat completion failed: #{response}" if !response.empty? && response.dig("error")
|
|
128
|
+
raise Langchain::LLM::ApiError.new "Chat completion failed: #{response.dig("error", "message")}" if !response.empty? && response.dig("error")
|
|
129
129
|
|
|
130
130
|
unless streaming
|
|
131
131
|
response.dig("choices", 0, "message", "content")
|
data/lib/langchain/loader.rb
CHANGED
|
@@ -51,6 +51,13 @@ module Langchain
|
|
|
51
51
|
!!(@path =~ URI_REGEX)
|
|
52
52
|
end
|
|
53
53
|
|
|
54
|
+
# Is the path a directory
|
|
55
|
+
#
|
|
56
|
+
# @return [Boolean] true if path is a directory
|
|
57
|
+
def directory?
|
|
58
|
+
File.directory?(@path)
|
|
59
|
+
end
|
|
60
|
+
|
|
54
61
|
# Load data from a file or URL
|
|
55
62
|
#
|
|
56
63
|
# loader = Langchain::Loader.new("README.md")
|
|
@@ -69,15 +76,10 @@ module Langchain
|
|
|
69
76
|
#
|
|
70
77
|
# @return [Data] data that was loaded
|
|
71
78
|
def load(&block)
|
|
72
|
-
|
|
79
|
+
return process_data(load_from_url, &block) if url?
|
|
80
|
+
return load_from_directory(&block) if directory?
|
|
73
81
|
|
|
74
|
-
|
|
75
|
-
yield @raw_data.read, @options
|
|
76
|
-
else
|
|
77
|
-
processor_klass.new(@options).parse(@raw_data)
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
Langchain::Data.new(data, source: @path)
|
|
82
|
+
process_data(load_from_path, &block)
|
|
81
83
|
end
|
|
82
84
|
|
|
83
85
|
private
|
|
@@ -92,6 +94,27 @@ module Langchain
|
|
|
92
94
|
File.open(@path)
|
|
93
95
|
end
|
|
94
96
|
|
|
97
|
+
def load_from_directory(&block)
|
|
98
|
+
Dir.glob(File.join(@path, "**/*")).map do |file|
|
|
99
|
+
# Only load and add to result files with supported extensions
|
|
100
|
+
Langchain::Loader.new(file, @options).load(&block)
|
|
101
|
+
rescue
|
|
102
|
+
UnknownFormatError nil
|
|
103
|
+
end.flatten.compact
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def process_data(data, &block)
|
|
107
|
+
@raw_data = data
|
|
108
|
+
|
|
109
|
+
result = if block
|
|
110
|
+
yield @raw_data.read, @options
|
|
111
|
+
else
|
|
112
|
+
processor_klass.new(@options).parse(@raw_data)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
Langchain::Data.new(result)
|
|
116
|
+
end
|
|
117
|
+
|
|
95
118
|
def processor_klass
|
|
96
119
|
raise UnknownFormatError unless (kind = find_processor)
|
|
97
120
|
|
|
@@ -37,6 +37,9 @@ module Langchain
|
|
|
37
37
|
#
|
|
38
38
|
def self.token_length(text, model_name = "chat-bison-001", options)
|
|
39
39
|
response = options[:llm].client.count_message_tokens(model: model_name, prompt: text)
|
|
40
|
+
|
|
41
|
+
raise Langchain::LLM::ApiError.new(response["error"]["message"]) unless response["error"].nil?
|
|
42
|
+
|
|
40
43
|
response.dig("tokenCount")
|
|
41
44
|
end
|
|
42
45
|
|
|
@@ -98,11 +98,21 @@ module Langchain::Vectorsearch
|
|
|
98
98
|
@llm = llm
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
+
# Method supported by Vectorsearch DB to retrieve a default schema
|
|
102
|
+
def get_default_schema
|
|
103
|
+
raise NotImplementedError, "#{self.class.name} does not support retrieving a default schema"
|
|
104
|
+
end
|
|
105
|
+
|
|
101
106
|
# Method supported by Vectorsearch DB to create a default schema
|
|
102
107
|
def create_default_schema
|
|
103
108
|
raise NotImplementedError, "#{self.class.name} does not support creating a default schema"
|
|
104
109
|
end
|
|
105
110
|
|
|
111
|
+
# Method supported by Vectorsearch DB to delete the default schema
|
|
112
|
+
def destroy_default_schema
|
|
113
|
+
raise NotImplementedError, "#{self.class.name} does not support deleting a default schema"
|
|
114
|
+
end
|
|
115
|
+
|
|
106
116
|
# Method supported by Vectorsearch DB to add a list of texts to the index
|
|
107
117
|
def add_texts(...)
|
|
108
118
|
raise NotImplementedError, "#{self.class.name} does not support adding texts"
|
|
@@ -67,6 +67,18 @@ module Langchain::Vectorsearch
|
|
|
67
67
|
::Chroma::Resources::Collection.create(index_name)
|
|
68
68
|
end
|
|
69
69
|
|
|
70
|
+
# Get the default schema
|
|
71
|
+
# @return [Hash] The response from the server
|
|
72
|
+
def get_default_schema
|
|
73
|
+
::Chroma::Resources::Collection.get(index_name)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Delete the default schema
|
|
77
|
+
# @return [Hash] The response from the server
|
|
78
|
+
def destroy_default_schema
|
|
79
|
+
::Chroma::Resources::Collection.delete(index_name)
|
|
80
|
+
end
|
|
81
|
+
|
|
70
82
|
# Search for similar texts
|
|
71
83
|
# @param query [String] The text to search for
|
|
72
84
|
# @param k [Integer] The number of results to return
|
|
@@ -10,8 +10,7 @@ module Langchain::Vectorsearch
|
|
|
10
10
|
# gem "hnswlib", "~> 0.8.1"
|
|
11
11
|
#
|
|
12
12
|
# Usage:
|
|
13
|
-
# hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:,
|
|
14
|
-
#
|
|
13
|
+
# hnsw = Langchain::Vectorsearch::Hnswlib.new(llm:, path_to_index:)
|
|
15
14
|
|
|
16
15
|
attr_reader :client, :path_to_index
|
|
17
16
|
|
|
@@ -53,6 +52,8 @@ module Langchain::Vectorsearch
|
|
|
53
52
|
client.save_index(path_to_index)
|
|
54
53
|
end
|
|
55
54
|
|
|
55
|
+
# TODO: Add update_texts method
|
|
56
|
+
|
|
56
57
|
#
|
|
57
58
|
# Search for similar texts
|
|
58
59
|
#
|
|
@@ -39,6 +39,8 @@ module Langchain::Vectorsearch
|
|
|
39
39
|
)
|
|
40
40
|
end
|
|
41
41
|
|
|
42
|
+
# TODO: Add update_texts method
|
|
43
|
+
|
|
42
44
|
# Create default schema
|
|
43
45
|
# @return [Hash] The response from the server
|
|
44
46
|
def create_default_schema
|
|
@@ -77,6 +79,18 @@ module Langchain::Vectorsearch
|
|
|
77
79
|
)
|
|
78
80
|
end
|
|
79
81
|
|
|
82
|
+
# Get the default schema
|
|
83
|
+
# @return [Hash] The response from the server
|
|
84
|
+
def get_default_schema
|
|
85
|
+
client.collections.get(collection_name: index_name)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Delete default schema
|
|
89
|
+
# @return [Hash] The response from the server
|
|
90
|
+
def destroy_default_schema
|
|
91
|
+
client.collections.delete(collection_name: index_name)
|
|
92
|
+
end
|
|
93
|
+
|
|
80
94
|
def similarity_search(query:, k: 4)
|
|
81
95
|
embedding = llm.embed(text: query)
|
|
82
96
|
|
|
@@ -79,6 +79,18 @@ module Langchain::Vectorsearch
|
|
|
79
79
|
)
|
|
80
80
|
end
|
|
81
81
|
|
|
82
|
+
# Delete the index
|
|
83
|
+
# @return [Hash] The response from the server
|
|
84
|
+
def destroy_default_schema
|
|
85
|
+
client.delete_index(index_name)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Get the default schema
|
|
89
|
+
# @return [Pinecone::Vector] The default schema
|
|
90
|
+
def get_default_schema
|
|
91
|
+
index
|
|
92
|
+
end
|
|
93
|
+
|
|
82
94
|
# Search for similar texts
|
|
83
95
|
# @param query [String] The text to search for
|
|
84
96
|
# @param k [Integer] The number of results to return
|
|
@@ -32,11 +32,12 @@ module Langchain::Vectorsearch
|
|
|
32
32
|
# Add a list of texts to the index
|
|
33
33
|
# @param texts [Array] The list of texts to add
|
|
34
34
|
# @return [Hash] The response from the server
|
|
35
|
-
def add_texts(texts:, ids:)
|
|
35
|
+
def add_texts(texts:, ids: [])
|
|
36
36
|
batch = {ids: [], vectors: [], payloads: []}
|
|
37
37
|
|
|
38
38
|
Array(texts).each_with_index do |text, i|
|
|
39
|
-
|
|
39
|
+
id = ids[i] || SecureRandom.uuid
|
|
40
|
+
batch[:ids].push(id)
|
|
40
41
|
batch[:vectors].push(llm.embed(text: text))
|
|
41
42
|
batch[:payloads].push({content: text})
|
|
42
43
|
end
|
|
@@ -51,6 +52,18 @@ module Langchain::Vectorsearch
|
|
|
51
52
|
add_texts(texts: texts, ids: ids)
|
|
52
53
|
end
|
|
53
54
|
|
|
55
|
+
# Get the default schema
|
|
56
|
+
# @return [Hash] The response from the server
|
|
57
|
+
def get_default_schema
|
|
58
|
+
client.collections.get(collection_name: index_name)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Deletes the default schema
|
|
62
|
+
# @return [Hash] The response from the server
|
|
63
|
+
def destroy_default_schema
|
|
64
|
+
client.collections.delete(collection_name: index_name)
|
|
65
|
+
end
|
|
66
|
+
|
|
54
67
|
# Create the index with the default schema
|
|
55
68
|
# @return [Hash] The response from the server
|
|
56
69
|
def create_default_schema
|
|
@@ -103,7 +116,7 @@ module Langchain::Vectorsearch
|
|
|
103
116
|
def ask(question:)
|
|
104
117
|
search_results = similarity_search(query: question)
|
|
105
118
|
|
|
106
|
-
context = search_results.
|
|
119
|
+
context = search_results.map do |result|
|
|
107
120
|
result.dig("payload").to_s
|
|
108
121
|
end
|
|
109
122
|
context = context.join("\n---\n")
|
|
@@ -72,6 +72,7 @@ module Langchain::Vectorsearch
|
|
|
72
72
|
end
|
|
73
73
|
|
|
74
74
|
# Create default schema
|
|
75
|
+
# @return [Hash] The response from the server
|
|
75
76
|
def create_default_schema
|
|
76
77
|
client.schema.create(
|
|
77
78
|
class_name: index_name,
|
|
@@ -84,6 +85,18 @@ module Langchain::Vectorsearch
|
|
|
84
85
|
)
|
|
85
86
|
end
|
|
86
87
|
|
|
88
|
+
# Get default schema
|
|
89
|
+
# @return [Hash] The response from the server
|
|
90
|
+
def get_default_schema
|
|
91
|
+
client.schema.get(class_name: index_name)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Delete the index
|
|
95
|
+
# @return [Boolean] Whether the index was deleted
|
|
96
|
+
def destroy_default_schema
|
|
97
|
+
client.schema.delete(class_name: index_name)
|
|
98
|
+
end
|
|
99
|
+
|
|
87
100
|
# Return documents similar to the query
|
|
88
101
|
# @param query [String] The query to search for
|
|
89
102
|
# @param k [Integer|String] The number of results to return
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
|
@@ -51,6 +51,7 @@ module Langchain
|
|
|
51
51
|
autoload :Loader, "langchain/loader"
|
|
52
52
|
autoload :Data, "langchain/data"
|
|
53
53
|
autoload :Conversation, "langchain/conversation"
|
|
54
|
+
autoload :ConversationMemory, "langchain/conversation_memory"
|
|
54
55
|
autoload :DependencyHelper, "langchain/dependency_helper"
|
|
55
56
|
autoload :ContextualLogger, "langchain/contextual_logger"
|
|
56
57
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: langchainrb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrei Bondarev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-07-01 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: baran
|
|
@@ -474,6 +474,7 @@ files:
|
|
|
474
474
|
- LICENSE.txt
|
|
475
475
|
- README.md
|
|
476
476
|
- Rakefile
|
|
477
|
+
- examples/conversation_with_openai.rb
|
|
477
478
|
- examples/create_and_manage_few_shot_prompt_templates.rb
|
|
478
479
|
- examples/create_and_manage_prompt_templates.rb
|
|
479
480
|
- examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
|
|
@@ -494,6 +495,7 @@ files:
|
|
|
494
495
|
- lib/langchain/chunker/text.rb
|
|
495
496
|
- lib/langchain/contextual_logger.rb
|
|
496
497
|
- lib/langchain/conversation.rb
|
|
498
|
+
- lib/langchain/conversation_memory.rb
|
|
497
499
|
- lib/langchain/data.rb
|
|
498
500
|
- lib/langchain/dependency_helper.rb
|
|
499
501
|
- lib/langchain/llm/ai21.rb
|