langchainrb 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8446cff4207c4784ce29233712430d50c2cbaa551cfe8ad27f413f27ad8346b
4
- data.tar.gz: 8c7105302b673479be9a840ed1a19c815cf5a0415195db5373ce739c472dc2f3
3
+ metadata.gz: ba1fb0e3fbc05e4279fe3a698ad8fb1a25e02788991a8e6a7b27b411771096f3
4
+ data.tar.gz: 309cda1c8c7a4982b22c6ad2f82c20fb12ca3bdfdc3e8c0ebeaa9687a7f71ce0
5
5
  SHA512:
6
- metadata.gz: d7bd52c8d161a2d26a1e717711053267ed3b78def7efa2e9411017d324e278baceb1d88eb709cb701986201b785e1ad8ee455948b1bba59d6cb478226b1b5df4
7
- data.tar.gz: eeb639084a8990e1d36d035e6cd6d725377cecc61b3116f70254787f5d591a387c6f53e308dd482d2a96257f231866cffdf6326c516dcf78e60017817d8ebb93
6
+ metadata.gz: 3457cbad7efbc5504f4cb3b684e3837984be5d486c1ee21a718508d606dd63ccc2223dffc55f3cc9c52f5fd0a533b364407bbb4c3208515e2bab8ca2af9ea60a
7
+ data.tar.gz: 1ff3cded239c286ee87d7a2f0a1cfa45c734b185e35b6e6fbc63fd2951f7c7397e8562fa145a16f545ef07b2e983f40715010e9c96653b5f2bf128325a5a7577
data/.env.example CHANGED
@@ -1,4 +1,5 @@
1
1
  AI21_API_KEY=
2
+ ANTHROPIC_API_KEY=
2
3
  CHROMA_URL=
3
4
  COHERE_API_KEY=
4
5
  GOOGLE_PALM_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.10] - 2023-08-01
4
+ - 🗣️ LLMs
5
+ - Introducing Anthropic support
6
+
7
+ ## [0.6.9] - 2023-07-29
8
+
9
+ ## [0.6.8] - 2023-07-21
10
+
3
11
  ## [0.6.7] - 2023-07-19
4
12
  - Support for OpenAI functions
5
13
  - Streaming vectorsearch ask() responses
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.8)
4
+ langchainrb (0.6.10)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -33,8 +33,11 @@ GEM
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  afm (0.2.2)
35
35
  ai21 (0.2.1)
36
+ anthropic (0.1.0)
37
+ faraday (>= 1)
38
+ faraday-multipart (>= 1)
36
39
  ast (2.4.2)
37
- baran (0.1.6)
40
+ baran (0.1.7)
38
41
  builder (3.2.4)
39
42
  byebug (11.1.3)
40
43
  childprocess (4.1.0)
@@ -189,7 +192,7 @@ GEM
189
192
  ruby-rc4
190
193
  ttfunk
191
194
  pg (1.5.3)
192
- pgvector (0.2.0)
195
+ pgvector (0.2.1)
193
196
  pinecone (0.1.71)
194
197
  dry-struct (~> 1.6.0)
195
198
  dry-validation (~> 1.10.0)
@@ -318,6 +321,7 @@ PLATFORMS
318
321
 
319
322
  DEPENDENCIES
320
323
  ai21 (~> 0.2.1)
324
+ anthropic (~> 0.1.0)
321
325
  chroma-db (~> 0.3.0)
322
326
  cohere-ruby (~> 0.9.5)
323
327
  docx (~> 0.8.0)
@@ -334,7 +338,7 @@ DEPENDENCIES
334
338
  open-weather-ruby-client (~> 0.3.0)
335
339
  pdf-reader (~> 1.4)
336
340
  pg (~> 1.5)
337
- pgvector (~> 0.2)
341
+ pgvector (~> 0.2.1)
338
342
  pinecone (~> 0.1.6)
339
343
  pry-byebug (~> 3.10.0)
340
344
  qdrant-ruby (~> 0.9.0)
@@ -353,4 +357,4 @@ DEPENDENCIES
353
357
  yard
354
358
 
355
359
  BUNDLED WITH
356
- 2.4.0
360
+ 2.3.22
data/README.md CHANGED
@@ -203,6 +203,12 @@ Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
203
203
  ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
204
204
  ```
205
205
 
206
+ #### Anthropic
207
+ Add `gem "anthropic", "~> 0.1.0"` to your Gemfile.
208
+ ```ruby
209
+ anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
210
+ ```
211
+
206
212
  ### Using Prompts 📋
207
213
 
208
214
  #### Prompt Templates
@@ -537,7 +543,7 @@ Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
537
543
 
538
544
  ## Contributing
539
545
 
540
- Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchain.
546
+ Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchainrb.
541
547
 
542
548
  ## License
543
549
 
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::LLM
4
+ #
5
+ # Wrapper around Anthropic APIs.
6
+ #
7
+ # Gem requirements:
8
+ # gem "anthropic", "~> 0.1.0"
9
+ #
10
+ # Usage:
11
+ # anthorpic = Langchain::LLM::Anthropic.new(api_key:)
12
+ #
13
+ class Anthropic < Base
14
+ DEFAULTS = {
15
+ temperature: 0.0,
16
+ completion_model_name: "claude-2"
17
+ }.freeze
18
+
19
+ # TODO: Implement token length validator for Anthropic
20
+ # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
21
+
22
+ def initialize(api_key:, llm_options: {}, default_options: {})
23
+ depends_on "anthropic"
24
+ require "anthropic"
25
+
26
+ @client = ::Anthropic::Client.new(access_token: api_key, **llm_options)
27
+ @defaults = DEFAULTS.merge(default_options)
28
+ end
29
+
30
+ #
31
+ # Generate a completion for a given prompt
32
+ #
33
+ # @param prompt [String] The prompt to generate a completion for
34
+ # @param params [Hash] extra parameters passed to Anthropic::Client#complete
35
+ # @return [String] The completion
36
+ #
37
+ def complete(prompt:, **params)
38
+ parameters = compose_parameters @defaults[:completion_model_name], params
39
+
40
+ parameters[:prompt] = prompt
41
+
42
+ # TODO: Implement token length validator for Anthropic
43
+ # parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
44
+
45
+ response = client.complete(parameters: parameters)
46
+ response.dig("completion")
47
+ end
48
+
49
+ private
50
+
51
+ def compose_parameters(model, params)
52
+ default_params = {model: model, temperature: @defaults[:temperature]}
53
+
54
+ default_params.merge(params)
55
+ end
56
+
57
+ # TODO: Implement token length validator for Anthropic
58
+ # def validate_max_tokens(messages, model)
59
+ # LENGTH_VALIDATOR.validate_max_tokens!(messages, model)
60
+ # end
61
+ end
62
+ end
@@ -22,7 +22,7 @@ module Langchain
22
22
  leftover_tokens = token_limit(model_name) - text_token_length
23
23
 
24
24
  # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
25
- if leftover_tokens <= 0
25
+ if leftover_tokens < 0
26
26
  raise limit_exceeded_exception(token_limit(model_name), text_token_length)
27
27
  end
28
28
 
@@ -25,6 +25,7 @@ module Langchain
25
25
  "code-davinci-002" => 8001,
26
26
  "gpt-4" => 8192,
27
27
  "gpt-4-0314" => 8192,
28
+ "gpt-4-0613" => 8192,
28
29
  "gpt-4-32k" => 32768,
29
30
  "gpt-4-32k-0314" => 32768,
30
31
  "gpt-4-32k-0613" => 32768,
@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "milvus", "~> 0.9.0"
9
9
  #
10
10
  # Usage:
11
- # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
11
+ # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
12
12
  #
13
13
 
14
14
  def initialize(url:, index_name:, llm:, api_key: nil)
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pgvector", "~> 0.2"
9
9
  #
10
10
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
11
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
12
12
  #
13
13
 
14
14
  # The operators supported by the PostgreSQL vector search adapter
15
15
  OPERATORS = {
16
- "cosine_distance" => "<=>",
17
- "euclidean_distance" => "<->"
16
+ "cosine_distance" => "cosine",
17
+ "euclidean_distance" => "euclidean"
18
18
  }
19
19
  DEFAULT_OPERATOR = "cosine_distance"
20
20
 
21
- attr_reader :operator, :quoted_table_name
21
+ attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
22
22
 
23
23
  # @param url [String] The URL of the PostgreSQL database
24
24
  # @param index_name [String] The name of the table to use for the index
25
25
  # @param llm [Object] The LLM client to use
26
- # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
27
- def initialize(url:, index_name:, llm:, api_key: nil)
28
- require "pg"
26
+ # @param namespace [String] The namespace to use for the index when inserting/querying
27
+ def initialize(url:, index_name:, llm:, namespace: nil)
28
+ depends_on "sequel"
29
+ require "sequel"
30
+ depends_on "pgvector"
29
31
  require "pgvector"
30
32
 
31
- @client = ::PG.connect(url)
32
- registry = ::PG::BasicTypeRegistry.new.define_default_types
33
- ::Pgvector::PG.register_vector(registry)
34
- @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
33
+ @db = Sequel.connect(url)
35
34
 
36
- @index_name = index_name
37
- @quoted_table_name = @client.quote_ident(index_name)
35
+ @table_name = index_name
36
+
37
+ @namespace_column = "namespace"
38
+ @namespace = namespace
38
39
  @operator = OPERATORS[DEFAULT_OPERATOR]
39
40
 
40
41
  super(llm: llm)
41
42
  end
42
43
 
44
+ def documents_model
45
+ Class.new(Sequel::Model(table_name.to_sym)) do
46
+ plugin :pgvector, :vectors
47
+ end
48
+ end
49
+
43
50
  # Upsert a list of texts to the index
44
51
  # @param texts [Array<String>] The texts to add to the index
45
52
  # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
47
54
  # the added or updated texts.
48
55
  def upsert_texts(texts:, ids:)
49
56
  data = texts.zip(ids).flat_map do |(text, id)|
50
- [id, text, llm.embed(text: text)]
57
+ {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
51
58
  end
52
- values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
- # see https://github.com/pgvector/pgvector#storing
54
- client.exec_params(
55
- "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
- #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
57
- data
58
- )
59
+ # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
60
+ @db[table_name.to_sym]
61
+ .insert_conflict(
62
+ target: :id,
63
+ update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
64
+ )
65
+ .multi_insert(data, return: :primary_key)
59
66
  end
60
67
 
61
68
  # Add a list of texts to the index
62
69
  # @param texts [Array<String>] The texts to add to the index
63
70
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
- # @return [PG::Result] The response from the database including the ids of
65
- # the added texts.
71
+ # @return [Array<Integer>] The the ids of the added texts.
66
72
  def add_texts(texts:, ids: nil)
67
73
  if ids.nil? || ids.empty?
68
- data = texts.flat_map do |text|
69
- [text, llm.embed(text: text)]
74
+ data = texts.map do |text|
75
+ {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
70
76
  end
71
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
- client.exec_params(
73
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
- data
75
- )
77
+
78
+ @db[table_name.to_sym].multi_insert(data, return: :primary_key)
76
79
  else
77
80
  upsert_texts(texts: texts, ids: ids)
78
81
  end
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
81
84
  # Update a list of ids and corresponding texts to the index
82
85
  # @param texts [Array<String>] The texts to add to the index
83
86
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
- # @return [PG::Result] The response from the database including the ids of
85
- # the updated texts.
87
+ # @return [Array<Integer>] The ids of the updated texts.
86
88
  def update_texts(texts:, ids:)
87
89
  upsert_texts(texts: texts, ids: ids)
88
90
  end
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
90
92
  # Create default schema
91
93
  # @return [PG::Result] The response from the database
92
94
  def create_default_schema
93
- client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
94
- client.exec(
95
- <<~SQL
96
- CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
97
- id serial PRIMARY KEY,
98
- content TEXT,
99
- vectors VECTOR(#{default_dimension})
100
- );
101
- SQL
102
- )
95
+ db.run "CREATE EXTENSION IF NOT EXISTS vector"
96
+ namespace = namespace_column
97
+ vector_dimension = default_dimension
98
+ db.create_table? table_name.to_sym do
99
+ primary_key :id
100
+ text :content
101
+ column :vectors, "vector(#{vector_dimension})"
102
+ text namespace.to_sym, default: nil
103
+ end
103
104
  end
104
105
 
105
106
  # TODO: Add destroy_default_schema method
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
123
124
  # @param k [Integer] The number of top results to return
124
125
  # @return [Array<Hash>] The results of the search
125
126
  def similarity_search_by_vector(embedding:, k: 4)
126
- result = client.transaction do |conn|
127
- conn.exec("SET LOCAL ivfflat.probes = 10;")
128
- query = <<~SQL
129
- SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
130
- SQL
131
- conn.exec_params(query, [embedding, k])
127
+ db.transaction do # BEGIN
128
+ documents_model
129
+ .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
130
+ .where(namespace_column.to_sym => namespace)
132
131
  end
133
-
134
- result.to_a
135
132
  end
136
133
 
137
134
  # Ask a question and return the answer
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
142
139
  search_results = similarity_search(query: question)
143
140
 
144
141
  context = search_results.map do |result|
145
- result["content"].to_s
142
+ result.content.to_s
146
143
  end
147
144
  context = context.join("\n---\n")
148
145
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.8"
4
+ VERSION = "0.6.10"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -131,6 +131,7 @@ module Langchain
131
131
 
132
132
  module LLM
133
133
  autoload :AI21, "langchain/llm/ai21"
134
+ autoload :Anthropic, "langchain/llm/anthropic"
134
135
  autoload :Base, "langchain/llm/base"
135
136
  autoload :Cohere, "langchain/llm/cohere"
136
137
  autoload :GooglePalm, "langchain/llm/google_palm"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
138
  version: 0.2.1
139
+ - !ruby/object:Gem::Dependency
140
+ name: anthropic
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.1.0
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.1.0
139
153
  - !ruby/object:Gem::Dependency
140
154
  name: chroma-db
141
155
  requirement: !ruby/object:Gem::Requirement
@@ -324,14 +338,14 @@ dependencies:
324
338
  requirements:
325
339
  - - "~>"
326
340
  - !ruby/object:Gem::Version
327
- version: '0.2'
341
+ version: 0.2.1
328
342
  type: :development
329
343
  prerelease: false
330
344
  version_requirements: !ruby/object:Gem::Requirement
331
345
  requirements:
332
346
  - - "~>"
333
347
  - !ruby/object:Gem::Version
334
- version: '0.2'
348
+ version: 0.2.1
335
349
  - !ruby/object:Gem::Dependency
336
350
  name: pdf-reader
337
351
  requirement: !ruby/object:Gem::Requirement
@@ -518,6 +532,7 @@ files:
518
532
  - lib/langchain/data.rb
519
533
  - lib/langchain/dependency_helper.rb
520
534
  - lib/langchain/llm/ai21.rb
535
+ - lib/langchain/llm/anthropic.rb
521
536
  - lib/langchain/llm/base.rb
522
537
  - lib/langchain/llm/cohere.rb
523
538
  - lib/langchain/llm/google_palm.rb