langchainrb 0.6.8 → 0.6.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8446cff4207c4784ce29233712430d50c2cbaa551cfe8ad27f413f27ad8346b
4
- data.tar.gz: 8c7105302b673479be9a840ed1a19c815cf5a0415195db5373ce739c472dc2f3
3
+ metadata.gz: ba1fb0e3fbc05e4279fe3a698ad8fb1a25e02788991a8e6a7b27b411771096f3
4
+ data.tar.gz: 309cda1c8c7a4982b22c6ad2f82c20fb12ca3bdfdc3e8c0ebeaa9687a7f71ce0
5
5
  SHA512:
6
- metadata.gz: d7bd52c8d161a2d26a1e717711053267ed3b78def7efa2e9411017d324e278baceb1d88eb709cb701986201b785e1ad8ee455948b1bba59d6cb478226b1b5df4
7
- data.tar.gz: eeb639084a8990e1d36d035e6cd6d725377cecc61b3116f70254787f5d591a387c6f53e308dd482d2a96257f231866cffdf6326c516dcf78e60017817d8ebb93
6
+ metadata.gz: 3457cbad7efbc5504f4cb3b684e3837984be5d486c1ee21a718508d606dd63ccc2223dffc55f3cc9c52f5fd0a533b364407bbb4c3208515e2bab8ca2af9ea60a
7
+ data.tar.gz: 1ff3cded239c286ee87d7a2f0a1cfa45c734b185e35b6e6fbc63fd2951f7c7397e8562fa145a16f545ef07b2e983f40715010e9c96653b5f2bf128325a5a7577
data/.env.example CHANGED
@@ -1,4 +1,5 @@
1
1
  AI21_API_KEY=
2
+ ANTHROPIC_API_KEY=
2
3
  CHROMA_URL=
3
4
  COHERE_API_KEY=
4
5
  GOOGLE_PALM_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.10] - 2023-08-01
4
+ - 🗣️ LLMs
5
+ - Introducing Anthropic support
6
+
7
+ ## [0.6.9] - 2023-07-29
8
+
9
+ ## [0.6.8] - 2023-07-21
10
+
3
11
  ## [0.6.7] - 2023-07-19
4
12
  - Support for OpenAI functions
5
13
  - Streaming vectorsearch ask() responses
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.8)
4
+ langchainrb (0.6.10)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -33,8 +33,11 @@ GEM
33
33
  public_suffix (>= 2.0.2, < 6.0)
34
34
  afm (0.2.2)
35
35
  ai21 (0.2.1)
36
+ anthropic (0.1.0)
37
+ faraday (>= 1)
38
+ faraday-multipart (>= 1)
36
39
  ast (2.4.2)
37
- baran (0.1.6)
40
+ baran (0.1.7)
38
41
  builder (3.2.4)
39
42
  byebug (11.1.3)
40
43
  childprocess (4.1.0)
@@ -189,7 +192,7 @@ GEM
189
192
  ruby-rc4
190
193
  ttfunk
191
194
  pg (1.5.3)
192
- pgvector (0.2.0)
195
+ pgvector (0.2.1)
193
196
  pinecone (0.1.71)
194
197
  dry-struct (~> 1.6.0)
195
198
  dry-validation (~> 1.10.0)
@@ -318,6 +321,7 @@ PLATFORMS
318
321
 
319
322
  DEPENDENCIES
320
323
  ai21 (~> 0.2.1)
324
+ anthropic (~> 0.1.0)
321
325
  chroma-db (~> 0.3.0)
322
326
  cohere-ruby (~> 0.9.5)
323
327
  docx (~> 0.8.0)
@@ -334,7 +338,7 @@ DEPENDENCIES
334
338
  open-weather-ruby-client (~> 0.3.0)
335
339
  pdf-reader (~> 1.4)
336
340
  pg (~> 1.5)
337
- pgvector (~> 0.2)
341
+ pgvector (~> 0.2.1)
338
342
  pinecone (~> 0.1.6)
339
343
  pry-byebug (~> 3.10.0)
340
344
  qdrant-ruby (~> 0.9.0)
@@ -353,4 +357,4 @@ DEPENDENCIES
353
357
  yard
354
358
 
355
359
  BUNDLED WITH
356
- 2.4.0
360
+ 2.3.22
data/README.md CHANGED
@@ -203,6 +203,12 @@ Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
203
203
  ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
204
204
  ```
205
205
 
206
+ #### Anthropic
207
+ Add `gem "anthropic", "~> 0.1.0"` to your Gemfile.
208
+ ```ruby
209
+ anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
210
+ ```
211
+
206
212
  ### Using Prompts 📋
207
213
 
208
214
  #### Prompt Templates
@@ -537,7 +543,7 @@ Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
537
543
 
538
544
  ## Contributing
539
545
 
540
- Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchain.
546
+ Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchainrb.
541
547
 
542
548
  ## License
543
549
 
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::LLM
4
+ #
5
+ # Wrapper around Anthropic APIs.
6
+ #
7
+ # Gem requirements:
8
+ # gem "anthropic", "~> 0.1.0"
9
+ #
10
+ # Usage:
11
+ # anthorpic = Langchain::LLM::Anthropic.new(api_key:)
12
+ #
13
+ class Anthropic < Base
14
+ DEFAULTS = {
15
+ temperature: 0.0,
16
+ completion_model_name: "claude-2"
17
+ }.freeze
18
+
19
+ # TODO: Implement token length validator for Anthropic
20
+ # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
21
+
22
+ def initialize(api_key:, llm_options: {}, default_options: {})
23
+ depends_on "anthropic"
24
+ require "anthropic"
25
+
26
+ @client = ::Anthropic::Client.new(access_token: api_key, **llm_options)
27
+ @defaults = DEFAULTS.merge(default_options)
28
+ end
29
+
30
+ #
31
+ # Generate a completion for a given prompt
32
+ #
33
+ # @param prompt [String] The prompt to generate a completion for
34
+ # @param params [Hash] extra parameters passed to Anthropic::Client#complete
35
+ # @return [String] The completion
36
+ #
37
+ def complete(prompt:, **params)
38
+ parameters = compose_parameters @defaults[:completion_model_name], params
39
+
40
+ parameters[:prompt] = prompt
41
+
42
+ # TODO: Implement token length validator for Anthropic
43
+ # parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
44
+
45
+ response = client.complete(parameters: parameters)
46
+ response.dig("completion")
47
+ end
48
+
49
+ private
50
+
51
+ def compose_parameters(model, params)
52
+ default_params = {model: model, temperature: @defaults[:temperature]}
53
+
54
+ default_params.merge(params)
55
+ end
56
+
57
+ # TODO: Implement token length validator for Anthropic
58
+ # def validate_max_tokens(messages, model)
59
+ # LENGTH_VALIDATOR.validate_max_tokens!(messages, model)
60
+ # end
61
+ end
62
+ end
@@ -22,7 +22,7 @@ module Langchain
22
22
  leftover_tokens = token_limit(model_name) - text_token_length
23
23
 
24
24
  # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
25
- if leftover_tokens <= 0
25
+ if leftover_tokens < 0
26
26
  raise limit_exceeded_exception(token_limit(model_name), text_token_length)
27
27
  end
28
28
 
@@ -25,6 +25,7 @@ module Langchain
25
25
  "code-davinci-002" => 8001,
26
26
  "gpt-4" => 8192,
27
27
  "gpt-4-0314" => 8192,
28
+ "gpt-4-0613" => 8192,
28
29
  "gpt-4-32k" => 32768,
29
30
  "gpt-4-32k-0314" => 32768,
30
31
  "gpt-4-32k-0613" => 32768,
@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "milvus", "~> 0.9.0"
9
9
  #
10
10
  # Usage:
11
- # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
11
+ # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
12
12
  #
13
13
 
14
14
  def initialize(url:, index_name:, llm:, api_key: nil)
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pgvector", "~> 0.2"
9
9
  #
10
10
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
11
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
12
12
  #
13
13
 
14
14
  # The operators supported by the PostgreSQL vector search adapter
15
15
  OPERATORS = {
16
- "cosine_distance" => "<=>",
17
- "euclidean_distance" => "<->"
16
+ "cosine_distance" => "cosine",
17
+ "euclidean_distance" => "euclidean"
18
18
  }
19
19
  DEFAULT_OPERATOR = "cosine_distance"
20
20
 
21
- attr_reader :operator, :quoted_table_name
21
+ attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
22
22
 
23
23
  # @param url [String] The URL of the PostgreSQL database
24
24
  # @param index_name [String] The name of the table to use for the index
25
25
  # @param llm [Object] The LLM client to use
26
- # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
27
- def initialize(url:, index_name:, llm:, api_key: nil)
28
- require "pg"
26
+ # @param namespace [String] The namespace to use for the index when inserting/querying
27
+ def initialize(url:, index_name:, llm:, namespace: nil)
28
+ depends_on "sequel"
29
+ require "sequel"
30
+ depends_on "pgvector"
29
31
  require "pgvector"
30
32
 
31
- @client = ::PG.connect(url)
32
- registry = ::PG::BasicTypeRegistry.new.define_default_types
33
- ::Pgvector::PG.register_vector(registry)
34
- @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
33
+ @db = Sequel.connect(url)
35
34
 
36
- @index_name = index_name
37
- @quoted_table_name = @client.quote_ident(index_name)
35
+ @table_name = index_name
36
+
37
+ @namespace_column = "namespace"
38
+ @namespace = namespace
38
39
  @operator = OPERATORS[DEFAULT_OPERATOR]
39
40
 
40
41
  super(llm: llm)
41
42
  end
42
43
 
44
+ def documents_model
45
+ Class.new(Sequel::Model(table_name.to_sym)) do
46
+ plugin :pgvector, :vectors
47
+ end
48
+ end
49
+
43
50
  # Upsert a list of texts to the index
44
51
  # @param texts [Array<String>] The texts to add to the index
45
52
  # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
47
54
  # the added or updated texts.
48
55
  def upsert_texts(texts:, ids:)
49
56
  data = texts.zip(ids).flat_map do |(text, id)|
50
- [id, text, llm.embed(text: text)]
57
+ {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
51
58
  end
52
- values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
- # see https://github.com/pgvector/pgvector#storing
54
- client.exec_params(
55
- "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
- #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
57
- data
58
- )
59
+ # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
60
+ @db[table_name.to_sym]
61
+ .insert_conflict(
62
+ target: :id,
63
+ update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
64
+ )
65
+ .multi_insert(data, return: :primary_key)
59
66
  end
60
67
 
61
68
  # Add a list of texts to the index
62
69
  # @param texts [Array<String>] The texts to add to the index
63
70
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
- # @return [PG::Result] The response from the database including the ids of
65
- # the added texts.
71
+ # @return [Array<Integer>] The the ids of the added texts.
66
72
  def add_texts(texts:, ids: nil)
67
73
  if ids.nil? || ids.empty?
68
- data = texts.flat_map do |text|
69
- [text, llm.embed(text: text)]
74
+ data = texts.map do |text|
75
+ {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
70
76
  end
71
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
- client.exec_params(
73
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
- data
75
- )
77
+
78
+ @db[table_name.to_sym].multi_insert(data, return: :primary_key)
76
79
  else
77
80
  upsert_texts(texts: texts, ids: ids)
78
81
  end
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
81
84
  # Update a list of ids and corresponding texts to the index
82
85
  # @param texts [Array<String>] The texts to add to the index
83
86
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
- # @return [PG::Result] The response from the database including the ids of
85
- # the updated texts.
87
+ # @return [Array<Integer>] The ids of the updated texts.
86
88
  def update_texts(texts:, ids:)
87
89
  upsert_texts(texts: texts, ids: ids)
88
90
  end
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
90
92
  # Create default schema
91
93
  # @return [PG::Result] The response from the database
92
94
  def create_default_schema
93
- client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
94
- client.exec(
95
- <<~SQL
96
- CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
97
- id serial PRIMARY KEY,
98
- content TEXT,
99
- vectors VECTOR(#{default_dimension})
100
- );
101
- SQL
102
- )
95
+ db.run "CREATE EXTENSION IF NOT EXISTS vector"
96
+ namespace = namespace_column
97
+ vector_dimension = default_dimension
98
+ db.create_table? table_name.to_sym do
99
+ primary_key :id
100
+ text :content
101
+ column :vectors, "vector(#{vector_dimension})"
102
+ text namespace.to_sym, default: nil
103
+ end
103
104
  end
104
105
 
105
106
  # TODO: Add destroy_default_schema method
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
123
124
  # @param k [Integer] The number of top results to return
124
125
  # @return [Array<Hash>] The results of the search
125
126
  def similarity_search_by_vector(embedding:, k: 4)
126
- result = client.transaction do |conn|
127
- conn.exec("SET LOCAL ivfflat.probes = 10;")
128
- query = <<~SQL
129
- SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
130
- SQL
131
- conn.exec_params(query, [embedding, k])
127
+ db.transaction do # BEGIN
128
+ documents_model
129
+ .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
130
+ .where(namespace_column.to_sym => namespace)
132
131
  end
133
-
134
- result.to_a
135
132
  end
136
133
 
137
134
  # Ask a question and return the answer
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
142
139
  search_results = similarity_search(query: question)
143
140
 
144
141
  context = search_results.map do |result|
145
- result["content"].to_s
142
+ result.content.to_s
146
143
  end
147
144
  context = context.join("\n---\n")
148
145
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.8"
4
+ VERSION = "0.6.10"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -131,6 +131,7 @@ module Langchain
131
131
 
132
132
  module LLM
133
133
  autoload :AI21, "langchain/llm/ai21"
134
+ autoload :Anthropic, "langchain/llm/anthropic"
134
135
  autoload :Base, "langchain/llm/base"
135
136
  autoload :Cohere, "langchain/llm/cohere"
136
137
  autoload :GooglePalm, "langchain/llm/google_palm"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-08-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -136,6 +136,20 @@ dependencies:
136
136
  - - "~>"
137
137
  - !ruby/object:Gem::Version
138
138
  version: 0.2.1
139
+ - !ruby/object:Gem::Dependency
140
+ name: anthropic
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.1.0
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.1.0
139
153
  - !ruby/object:Gem::Dependency
140
154
  name: chroma-db
141
155
  requirement: !ruby/object:Gem::Requirement
@@ -324,14 +338,14 @@ dependencies:
324
338
  requirements:
325
339
  - - "~>"
326
340
  - !ruby/object:Gem::Version
327
- version: '0.2'
341
+ version: 0.2.1
328
342
  type: :development
329
343
  prerelease: false
330
344
  version_requirements: !ruby/object:Gem::Requirement
331
345
  requirements:
332
346
  - - "~>"
333
347
  - !ruby/object:Gem::Version
334
- version: '0.2'
348
+ version: 0.2.1
335
349
  - !ruby/object:Gem::Dependency
336
350
  name: pdf-reader
337
351
  requirement: !ruby/object:Gem::Requirement
@@ -518,6 +532,7 @@ files:
518
532
  - lib/langchain/data.rb
519
533
  - lib/langchain/dependency_helper.rb
520
534
  - lib/langchain/llm/ai21.rb
535
+ - lib/langchain/llm/anthropic.rb
521
536
  - lib/langchain/llm/base.rb
522
537
  - lib/langchain/llm/cohere.rb
523
538
  - lib/langchain/llm/google_palm.rb