langchainrb 0.6.8 → 0.6.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +9 -5
- data/README.md +7 -1
- data/lib/langchain/llm/anthropic.rb +62 -0
- data/lib/langchain/utils/token_length/base_validator.rb +1 -1
- data/lib/langchain/utils/token_length/openai_validator.rb +1 -0
- data/lib/langchain/vectorsearch/milvus.rb +1 -1
- data/lib/langchain/vectorsearch/pgvector.rb +48 -51
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -0
- metadata +19 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ba1fb0e3fbc05e4279fe3a698ad8fb1a25e02788991a8e6a7b27b411771096f3
|
4
|
+
data.tar.gz: 309cda1c8c7a4982b22c6ad2f82c20fb12ca3bdfdc3e8c0ebeaa9687a7f71ce0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3457cbad7efbc5504f4cb3b684e3837984be5d486c1ee21a718508d606dd63ccc2223dffc55f3cc9c52f5fd0a533b364407bbb4c3208515e2bab8ca2af9ea60a
|
7
|
+
data.tar.gz: 1ff3cded239c286ee87d7a2f0a1cfa45c734b185e35b6e6fbc63fd2951f7c7397e8562fa145a16f545ef07b2e983f40715010e9c96653b5f2bf128325a5a7577
|
data/.env.example
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.6.
|
4
|
+
langchainrb (0.6.10)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
7
|
json-schema (~> 4.0.0)
|
@@ -33,8 +33,11 @@ GEM
|
|
33
33
|
public_suffix (>= 2.0.2, < 6.0)
|
34
34
|
afm (0.2.2)
|
35
35
|
ai21 (0.2.1)
|
36
|
+
anthropic (0.1.0)
|
37
|
+
faraday (>= 1)
|
38
|
+
faraday-multipart (>= 1)
|
36
39
|
ast (2.4.2)
|
37
|
-
baran (0.1.
|
40
|
+
baran (0.1.7)
|
38
41
|
builder (3.2.4)
|
39
42
|
byebug (11.1.3)
|
40
43
|
childprocess (4.1.0)
|
@@ -189,7 +192,7 @@ GEM
|
|
189
192
|
ruby-rc4
|
190
193
|
ttfunk
|
191
194
|
pg (1.5.3)
|
192
|
-
pgvector (0.2.
|
195
|
+
pgvector (0.2.1)
|
193
196
|
pinecone (0.1.71)
|
194
197
|
dry-struct (~> 1.6.0)
|
195
198
|
dry-validation (~> 1.10.0)
|
@@ -318,6 +321,7 @@ PLATFORMS
|
|
318
321
|
|
319
322
|
DEPENDENCIES
|
320
323
|
ai21 (~> 0.2.1)
|
324
|
+
anthropic (~> 0.1.0)
|
321
325
|
chroma-db (~> 0.3.0)
|
322
326
|
cohere-ruby (~> 0.9.5)
|
323
327
|
docx (~> 0.8.0)
|
@@ -334,7 +338,7 @@ DEPENDENCIES
|
|
334
338
|
open-weather-ruby-client (~> 0.3.0)
|
335
339
|
pdf-reader (~> 1.4)
|
336
340
|
pg (~> 1.5)
|
337
|
-
pgvector (~> 0.2)
|
341
|
+
pgvector (~> 0.2.1)
|
338
342
|
pinecone (~> 0.1.6)
|
339
343
|
pry-byebug (~> 3.10.0)
|
340
344
|
qdrant-ruby (~> 0.9.0)
|
@@ -353,4 +357,4 @@ DEPENDENCIES
|
|
353
357
|
yard
|
354
358
|
|
355
359
|
BUNDLED WITH
|
356
|
-
2.
|
360
|
+
2.3.22
|
data/README.md
CHANGED
@@ -203,6 +203,12 @@ Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
|
|
203
203
|
ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
|
204
204
|
```
|
205
205
|
|
206
|
+
#### Anthropic
|
207
|
+
Add `gem "anthropic", "~> 0.1.0"` to your Gemfile.
|
208
|
+
```ruby
|
209
|
+
anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
|
210
|
+
```
|
211
|
+
|
206
212
|
### Using Prompts 📋
|
207
213
|
|
208
214
|
#### Prompt Templates
|
@@ -537,7 +543,7 @@ Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
|
|
537
543
|
|
538
544
|
## Contributing
|
539
545
|
|
540
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/
|
546
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchainrb.
|
541
547
|
|
542
548
|
## License
|
543
549
|
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::LLM
|
4
|
+
#
|
5
|
+
# Wrapper around Anthropic APIs.
|
6
|
+
#
|
7
|
+
# Gem requirements:
|
8
|
+
# gem "anthropic", "~> 0.1.0"
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# anthorpic = Langchain::LLM::Anthropic.new(api_key:)
|
12
|
+
#
|
13
|
+
class Anthropic < Base
|
14
|
+
DEFAULTS = {
|
15
|
+
temperature: 0.0,
|
16
|
+
completion_model_name: "claude-2"
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
# TODO: Implement token length validator for Anthropic
|
20
|
+
# LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
|
21
|
+
|
22
|
+
def initialize(api_key:, llm_options: {}, default_options: {})
|
23
|
+
depends_on "anthropic"
|
24
|
+
require "anthropic"
|
25
|
+
|
26
|
+
@client = ::Anthropic::Client.new(access_token: api_key, **llm_options)
|
27
|
+
@defaults = DEFAULTS.merge(default_options)
|
28
|
+
end
|
29
|
+
|
30
|
+
#
|
31
|
+
# Generate a completion for a given prompt
|
32
|
+
#
|
33
|
+
# @param prompt [String] The prompt to generate a completion for
|
34
|
+
# @param params [Hash] extra parameters passed to Anthropic::Client#complete
|
35
|
+
# @return [String] The completion
|
36
|
+
#
|
37
|
+
def complete(prompt:, **params)
|
38
|
+
parameters = compose_parameters @defaults[:completion_model_name], params
|
39
|
+
|
40
|
+
parameters[:prompt] = prompt
|
41
|
+
|
42
|
+
# TODO: Implement token length validator for Anthropic
|
43
|
+
# parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
|
44
|
+
|
45
|
+
response = client.complete(parameters: parameters)
|
46
|
+
response.dig("completion")
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def compose_parameters(model, params)
|
52
|
+
default_params = {model: model, temperature: @defaults[:temperature]}
|
53
|
+
|
54
|
+
default_params.merge(params)
|
55
|
+
end
|
56
|
+
|
57
|
+
# TODO: Implement token length validator for Anthropic
|
58
|
+
# def validate_max_tokens(messages, model)
|
59
|
+
# LENGTH_VALIDATOR.validate_max_tokens!(messages, model)
|
60
|
+
# end
|
61
|
+
end
|
62
|
+
end
|
@@ -22,7 +22,7 @@ module Langchain
|
|
22
22
|
leftover_tokens = token_limit(model_name) - text_token_length
|
23
23
|
|
24
24
|
# Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
|
25
|
-
if leftover_tokens
|
25
|
+
if leftover_tokens < 0
|
26
26
|
raise limit_exceeded_exception(token_limit(model_name), text_token_length)
|
27
27
|
end
|
28
28
|
|
@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
|
|
8
8
|
# Gem requirements: gem "milvus", "~> 0.9.0"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
|
-
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:,
|
11
|
+
# milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
|
12
12
|
#
|
13
13
|
|
14
14
|
def initialize(url:, index_name:, llm:, api_key: nil)
|
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
|
|
8
8
|
# Gem requirements: gem "pgvector", "~> 0.2"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
|
-
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:,
|
11
|
+
# pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
|
12
12
|
#
|
13
13
|
|
14
14
|
# The operators supported by the PostgreSQL vector search adapter
|
15
15
|
OPERATORS = {
|
16
|
-
"cosine_distance" => "
|
17
|
-
"euclidean_distance" => "
|
16
|
+
"cosine_distance" => "cosine",
|
17
|
+
"euclidean_distance" => "euclidean"
|
18
18
|
}
|
19
19
|
DEFAULT_OPERATOR = "cosine_distance"
|
20
20
|
|
21
|
-
attr_reader :operator, :
|
21
|
+
attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
|
22
22
|
|
23
23
|
# @param url [String] The URL of the PostgreSQL database
|
24
24
|
# @param index_name [String] The name of the table to use for the index
|
25
25
|
# @param llm [Object] The LLM client to use
|
26
|
-
# @param
|
27
|
-
def initialize(url:, index_name:, llm:,
|
28
|
-
|
26
|
+
# @param namespace [String] The namespace to use for the index when inserting/querying
|
27
|
+
def initialize(url:, index_name:, llm:, namespace: nil)
|
28
|
+
depends_on "sequel"
|
29
|
+
require "sequel"
|
30
|
+
depends_on "pgvector"
|
29
31
|
require "pgvector"
|
30
32
|
|
31
|
-
@
|
32
|
-
registry = ::PG::BasicTypeRegistry.new.define_default_types
|
33
|
-
::Pgvector::PG.register_vector(registry)
|
34
|
-
@client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
|
33
|
+
@db = Sequel.connect(url)
|
35
34
|
|
36
|
-
@
|
37
|
-
|
35
|
+
@table_name = index_name
|
36
|
+
|
37
|
+
@namespace_column = "namespace"
|
38
|
+
@namespace = namespace
|
38
39
|
@operator = OPERATORS[DEFAULT_OPERATOR]
|
39
40
|
|
40
41
|
super(llm: llm)
|
41
42
|
end
|
42
43
|
|
44
|
+
def documents_model
|
45
|
+
Class.new(Sequel::Model(table_name.to_sym)) do
|
46
|
+
plugin :pgvector, :vectors
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
43
50
|
# Upsert a list of texts to the index
|
44
51
|
# @param texts [Array<String>] The texts to add to the index
|
45
52
|
# @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
|
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
|
|
47
54
|
# the added or updated texts.
|
48
55
|
def upsert_texts(texts:, ids:)
|
49
56
|
data = texts.zip(ids).flat_map do |(text, id)|
|
50
|
-
|
57
|
+
{id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
|
51
58
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
+
# @db[table_name.to_sym].multi_insert(data, return: :primary_key)
|
60
|
+
@db[table_name.to_sym]
|
61
|
+
.insert_conflict(
|
62
|
+
target: :id,
|
63
|
+
update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
|
64
|
+
)
|
65
|
+
.multi_insert(data, return: :primary_key)
|
59
66
|
end
|
60
67
|
|
61
68
|
# Add a list of texts to the index
|
62
69
|
# @param texts [Array<String>] The texts to add to the index
|
63
70
|
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
64
|
-
# @return [
|
65
|
-
# the added texts.
|
71
|
+
# @return [Array<Integer>] The the ids of the added texts.
|
66
72
|
def add_texts(texts:, ids: nil)
|
67
73
|
if ids.nil? || ids.empty?
|
68
|
-
data = texts.
|
69
|
-
|
74
|
+
data = texts.map do |text|
|
75
|
+
{content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
|
70
76
|
end
|
71
|
-
|
72
|
-
|
73
|
-
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
|
74
|
-
data
|
75
|
-
)
|
77
|
+
|
78
|
+
@db[table_name.to_sym].multi_insert(data, return: :primary_key)
|
76
79
|
else
|
77
80
|
upsert_texts(texts: texts, ids: ids)
|
78
81
|
end
|
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
|
|
81
84
|
# Update a list of ids and corresponding texts to the index
|
82
85
|
# @param texts [Array<String>] The texts to add to the index
|
83
86
|
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
84
|
-
# @return [
|
85
|
-
# the updated texts.
|
87
|
+
# @return [Array<Integer>] The ids of the updated texts.
|
86
88
|
def update_texts(texts:, ids:)
|
87
89
|
upsert_texts(texts: texts, ids: ids)
|
88
90
|
end
|
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
|
|
90
92
|
# Create default schema
|
91
93
|
# @return [PG::Result] The response from the database
|
92
94
|
def create_default_schema
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
)
|
95
|
+
db.run "CREATE EXTENSION IF NOT EXISTS vector"
|
96
|
+
namespace = namespace_column
|
97
|
+
vector_dimension = default_dimension
|
98
|
+
db.create_table? table_name.to_sym do
|
99
|
+
primary_key :id
|
100
|
+
text :content
|
101
|
+
column :vectors, "vector(#{vector_dimension})"
|
102
|
+
text namespace.to_sym, default: nil
|
103
|
+
end
|
103
104
|
end
|
104
105
|
|
105
106
|
# TODO: Add destroy_default_schema method
|
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
|
|
123
124
|
# @param k [Integer] The number of top results to return
|
124
125
|
# @return [Array<Hash>] The results of the search
|
125
126
|
def similarity_search_by_vector(embedding:, k: 4)
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
SQL
|
131
|
-
conn.exec_params(query, [embedding, k])
|
127
|
+
db.transaction do # BEGIN
|
128
|
+
documents_model
|
129
|
+
.nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
|
130
|
+
.where(namespace_column.to_sym => namespace)
|
132
131
|
end
|
133
|
-
|
134
|
-
result.to_a
|
135
132
|
end
|
136
133
|
|
137
134
|
# Ask a question and return the answer
|
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
|
|
142
139
|
search_results = similarity_search(query: question)
|
143
140
|
|
144
141
|
context = search_results.map do |result|
|
145
|
-
result
|
142
|
+
result.content.to_s
|
146
143
|
end
|
147
144
|
context = context.join("\n---\n")
|
148
145
|
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -131,6 +131,7 @@ module Langchain
|
|
131
131
|
|
132
132
|
module LLM
|
133
133
|
autoload :AI21, "langchain/llm/ai21"
|
134
|
+
autoload :Anthropic, "langchain/llm/anthropic"
|
134
135
|
autoload :Base, "langchain/llm/base"
|
135
136
|
autoload :Cohere, "langchain/llm/cohere"
|
136
137
|
autoload :GooglePalm, "langchain/llm/google_palm"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - "~>"
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: 0.2.1
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: anthropic
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.1.0
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.1.0
|
139
153
|
- !ruby/object:Gem::Dependency
|
140
154
|
name: chroma-db
|
141
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -324,14 +338,14 @@ dependencies:
|
|
324
338
|
requirements:
|
325
339
|
- - "~>"
|
326
340
|
- !ruby/object:Gem::Version
|
327
|
-
version:
|
341
|
+
version: 0.2.1
|
328
342
|
type: :development
|
329
343
|
prerelease: false
|
330
344
|
version_requirements: !ruby/object:Gem::Requirement
|
331
345
|
requirements:
|
332
346
|
- - "~>"
|
333
347
|
- !ruby/object:Gem::Version
|
334
|
-
version:
|
348
|
+
version: 0.2.1
|
335
349
|
- !ruby/object:Gem::Dependency
|
336
350
|
name: pdf-reader
|
337
351
|
requirement: !ruby/object:Gem::Requirement
|
@@ -518,6 +532,7 @@ files:
|
|
518
532
|
- lib/langchain/data.rb
|
519
533
|
- lib/langchain/dependency_helper.rb
|
520
534
|
- lib/langchain/llm/ai21.rb
|
535
|
+
- lib/langchain/llm/anthropic.rb
|
521
536
|
- lib/langchain/llm/base.rb
|
522
537
|
- lib/langchain/llm/cohere.rb
|
523
538
|
- lib/langchain/llm/google_palm.rb
|