langchainrb 0.6.8 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d046a7cae545da3092694fee60b0fa3caa0852492c9bf7dbbe35001d96106e4
|
4
|
+
data.tar.gz: 8ed841d2359400f9a4700a49b82030fbe3dd3563545eca31b487914e8fb236f7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: eed2f03cbdd74ef42fafd07524dc6bf5c7ac2278e5e55dd1aad5b2cd87f319a91c8719a6e9ac43ac50cda5525f3defab14cfecc749bdec81075d52b3935b23dd
|
7
|
+
data.tar.gz: cd4fce88b0f9f545f6d32660879e6fa5044ec74b41dcb41bc29eafb78b307d8b2cb8204856098657d8c9707331c8629c8a707a227fe4d20a5816347f8a66991c
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.6.
|
4
|
+
langchainrb (0.6.9)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
7
|
json-schema (~> 4.0.0)
|
@@ -34,7 +34,7 @@ GEM
|
|
34
34
|
afm (0.2.2)
|
35
35
|
ai21 (0.2.1)
|
36
36
|
ast (2.4.2)
|
37
|
-
baran (0.1.
|
37
|
+
baran (0.1.7)
|
38
38
|
builder (3.2.4)
|
39
39
|
byebug (11.1.3)
|
40
40
|
childprocess (4.1.0)
|
@@ -189,7 +189,7 @@ GEM
|
|
189
189
|
ruby-rc4
|
190
190
|
ttfunk
|
191
191
|
pg (1.5.3)
|
192
|
-
pgvector (0.2.
|
192
|
+
pgvector (0.2.1)
|
193
193
|
pinecone (0.1.71)
|
194
194
|
dry-struct (~> 1.6.0)
|
195
195
|
dry-validation (~> 1.10.0)
|
@@ -334,7 +334,7 @@ DEPENDENCIES
|
|
334
334
|
open-weather-ruby-client (~> 0.3.0)
|
335
335
|
pdf-reader (~> 1.4)
|
336
336
|
pg (~> 1.5)
|
337
|
-
pgvector (~> 0.2)
|
337
|
+
pgvector (~> 0.2.1)
|
338
338
|
pinecone (~> 0.1.6)
|
339
339
|
pry-byebug (~> 3.10.0)
|
340
340
|
qdrant-ruby (~> 0.9.0)
|
@@ -353,4 +353,4 @@ DEPENDENCIES
|
|
353
353
|
yard
|
354
354
|
|
355
355
|
BUNDLED WITH
|
356
|
-
2.
|
356
|
+
2.3.22
|
@@ -22,7 +22,7 @@ module Langchain
|
|
22
22
|
leftover_tokens = token_limit(model_name) - text_token_length
|
23
23
|
|
24
24
|
# Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
|
25
|
-
if leftover_tokens
|
25
|
+
if leftover_tokens < 0
|
26
26
|
raise limit_exceeded_exception(token_limit(model_name), text_token_length)
|
27
27
|
end
|
28
28
|
|
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
|
|
8
8
|
# Gem requirements: gem "pgvector", "~> 0.2"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
|
-
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:,
|
11
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
|
12
12
|
#
|
13
13
|
|
14
14
|
# The operators supported by the PostgreSQL vector search adapter
|
15
15
|
OPERATORS = {
|
16
|
-
"cosine_distance" => "
|
17
|
-
"euclidean_distance" => "
|
16
|
+
"cosine_distance" => "cosine",
|
17
|
+
"euclidean_distance" => "euclidean"
|
18
18
|
}
|
19
19
|
DEFAULT_OPERATOR = "cosine_distance"
|
20
20
|
|
21
|
-
attr_reader :operator, :
|
21
|
+
attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
|
22
22
|
|
23
23
|
# @param url [String] The URL of the PostgreSQL database
|
24
24
|
# @param index_name [String] The name of the table to use for the index
|
25
25
|
# @param llm [Object] The LLM client to use
|
26
|
-
# @param
|
27
|
-
def initialize(url:, index_name:, llm:,
|
28
|
-
|
26
|
+
# @param namespace [String] The namespace to use for the index when inserting/querying
|
27
|
+
def initialize(url:, index_name:, llm:, namespace: nil)
|
28
|
+
depends_on "sequel"
|
29
|
+
require "sequel"
|
30
|
+
depends_on "pgvector"
|
29
31
|
require "pgvector"
|
30
32
|
|
31
|
-
@
|
32
|
-
registry = ::PG::BasicTypeRegistry.new.define_default_types
|
33
|
-
::Pgvector::PG.register_vector(registry)
|
34
|
-
@client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
|
33
|
+
@db = Sequel.connect(url)
|
35
34
|
|
36
|
-
@
|
37
|
-
|
35
|
+
@table_name = index_name
|
36
|
+
|
37
|
+
@namespace_column = "namespace"
|
38
|
+
@namespace = namespace
|
38
39
|
@operator = OPERATORS[DEFAULT_OPERATOR]
|
39
40
|
|
40
41
|
super(llm: llm)
|
41
42
|
end
|
42
43
|
|
44
|
+
def documents_model
|
45
|
+
Class.new(Sequel::Model(table_name.to_sym)) do
|
46
|
+
plugin :pgvector, :vectors
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
43
50
|
# Upsert a list of texts to the index
|
44
51
|
# @param texts [Array<String>] The texts to add to the index
|
45
52
|
# @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
|
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
|
|
47
54
|
# the added or updated texts.
|
48
55
|
def upsert_texts(texts:, ids:)
|
49
56
|
data = texts.zip(ids).flat_map do |(text, id)|
|
50
|
-
|
57
|
+
{id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
|
51
58
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
# @db[table_name.to_sym].multi_insert(data, return: :primary_key)
|
60
|
+
@db[table_name.to_sym]
|
61
|
+
.insert_conflict(
|
62
|
+
target: :id,
|
63
|
+
update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
|
64
|
+
)
|
65
|
+
.multi_insert(data, return: :primary_key)
|
59
66
|
end
|
60
67
|
|
61
68
|
# Add a list of texts to the index
|
62
69
|
# @param texts [Array<String>] The texts to add to the index
|
63
70
|
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
64
|
-
# @return [
|
65
|
-
# the added texts.
|
71
|
+
# @return [Array<Integer>] The the ids of the added texts.
|
66
72
|
def add_texts(texts:, ids: nil)
|
67
73
|
if ids.nil? || ids.empty?
|
68
|
-
data = texts.
|
69
|
-
|
74
|
+
data = texts.map do |text|
|
75
|
+
{content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
|
70
76
|
end
|
71
|
-
|
72
|
-
|
73
|
-
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
|
74
|
-
data
|
75
|
-
)
|
77
|
+
|
78
|
+
@db[table_name.to_sym].multi_insert(data, return: :primary_key)
|
76
79
|
else
|
77
80
|
upsert_texts(texts: texts, ids: ids)
|
78
81
|
end
|
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
|
|
81
84
|
# Update a list of ids and corresponding texts to the index
|
82
85
|
# @param texts [Array<String>] The texts to add to the index
|
83
86
|
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
84
|
-
# @return [
|
85
|
-
# the updated texts.
|
87
|
+
# @return [Array<Integer>] The ids of the updated texts.
|
86
88
|
def update_texts(texts:, ids:)
|
87
89
|
upsert_texts(texts: texts, ids: ids)
|
88
90
|
end
|
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
|
|
90
92
|
# Create default schema
|
91
93
|
# @return [PG::Result] The response from the database
|
92
94
|
def create_default_schema
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
)
|
95
|
+
db.run "CREATE EXTENSION IF NOT EXISTS vector"
|
96
|
+
namespace = namespace_column
|
97
|
+
vector_dimension = default_dimension
|
98
|
+
db.create_table? table_name.to_sym do
|
99
|
+
primary_key :id
|
100
|
+
text :content
|
101
|
+
column :vectors, "vector(#{vector_dimension})"
|
102
|
+
text namespace.to_sym, default: nil
|
103
|
+
end
|
103
104
|
end
|
104
105
|
|
105
106
|
# TODO: Add destroy_default_schema method
|
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
|
|
123
124
|
# @param k [Integer] The number of top results to return
|
124
125
|
# @return [Array<Hash>] The results of the search
|
125
126
|
def similarity_search_by_vector(embedding:, k: 4)
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
SQL
|
131
|
-
conn.exec_params(query, [embedding, k])
|
127
|
+
db.transaction do # BEGIN
|
128
|
+
documents_model
|
129
|
+
.nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
|
130
|
+
.where(namespace_column.to_sym => namespace)
|
132
131
|
end
|
133
|
-
|
134
|
-
result.to_a
|
135
132
|
end
|
136
133
|
|
137
134
|
# Ask a question and return the answer
|
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
|
|
142
139
|
search_results = similarity_search(query: question)
|
143
140
|
|
144
141
|
context = search_results.map do |result|
|
145
|
-
result
|
142
|
+
result.content.to_s
|
146
143
|
end
|
147
144
|
context = context.join("\n---\n")
|
148
145
|
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -324,14 +324,14 @@ dependencies:
|
|
324
324
|
requirements:
|
325
325
|
- - "~>"
|
326
326
|
- !ruby/object:Gem::Version
|
327
|
-
version:
|
327
|
+
version: 0.2.1
|
328
328
|
type: :development
|
329
329
|
prerelease: false
|
330
330
|
version_requirements: !ruby/object:Gem::Requirement
|
331
331
|
requirements:
|
332
332
|
- - "~>"
|
333
333
|
- !ruby/object:Gem::Version
|
334
|
-
version:
|
334
|
+
version: 0.2.1
|
335
335
|
- !ruby/object:Gem::Dependency
|
336
336
|
name: pdf-reader
|
337
337
|
requirement: !ruby/object:Gem::Requirement
|