langchainrb 0.6.8 → 0.6.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8446cff4207c4784ce29233712430d50c2cbaa551cfe8ad27f413f27ad8346b
4
- data.tar.gz: 8c7105302b673479be9a840ed1a19c815cf5a0415195db5373ce739c472dc2f3
3
+ metadata.gz: 4d046a7cae545da3092694fee60b0fa3caa0852492c9bf7dbbe35001d96106e4
4
+ data.tar.gz: 8ed841d2359400f9a4700a49b82030fbe3dd3563545eca31b487914e8fb236f7
5
5
  SHA512:
6
- metadata.gz: d7bd52c8d161a2d26a1e717711053267ed3b78def7efa2e9411017d324e278baceb1d88eb709cb701986201b785e1ad8ee455948b1bba59d6cb478226b1b5df4
7
- data.tar.gz: eeb639084a8990e1d36d035e6cd6d725377cecc61b3116f70254787f5d591a387c6f53e308dd482d2a96257f231866cffdf6326c516dcf78e60017817d8ebb93
6
+ metadata.gz: eed2f03cbdd74ef42fafd07524dc6bf5c7ac2278e5e55dd1aad5b2cd87f319a91c8719a6e9ac43ac50cda5525f3defab14cfecc749bdec81075d52b3935b23dd
7
+ data.tar.gz: cd4fce88b0f9f545f6d32660879e6fa5044ec74b41dcb41bc29eafb78b307d8b2cb8204856098657d8c9707331c8629c8a707a227fe4d20a5816347f8a66991c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.9] - 2023-07-29
4
+
5
+ ## [0.6.8] - 2023-07-21
6
+
3
7
  ## [0.6.7] - 2023-07-19
4
8
  - Support for OpenAI functions
5
9
  - Streaming vectorsearch ask() responses
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.8)
4
+ langchainrb (0.6.9)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -34,7 +34,7 @@ GEM
34
34
  afm (0.2.2)
35
35
  ai21 (0.2.1)
36
36
  ast (2.4.2)
37
- baran (0.1.6)
37
+ baran (0.1.7)
38
38
  builder (3.2.4)
39
39
  byebug (11.1.3)
40
40
  childprocess (4.1.0)
@@ -189,7 +189,7 @@ GEM
189
189
  ruby-rc4
190
190
  ttfunk
191
191
  pg (1.5.3)
192
- pgvector (0.2.0)
192
+ pgvector (0.2.1)
193
193
  pinecone (0.1.71)
194
194
  dry-struct (~> 1.6.0)
195
195
  dry-validation (~> 1.10.0)
@@ -334,7 +334,7 @@ DEPENDENCIES
334
334
  open-weather-ruby-client (~> 0.3.0)
335
335
  pdf-reader (~> 1.4)
336
336
  pg (~> 1.5)
337
- pgvector (~> 0.2)
337
+ pgvector (~> 0.2.1)
338
338
  pinecone (~> 0.1.6)
339
339
  pry-byebug (~> 3.10.0)
340
340
  qdrant-ruby (~> 0.9.0)
@@ -353,4 +353,4 @@ DEPENDENCIES
353
353
  yard
354
354
 
355
355
  BUNDLED WITH
356
- 2.4.0
356
+ 2.3.22
@@ -22,7 +22,7 @@ module Langchain
22
22
  leftover_tokens = token_limit(model_name) - text_token_length
23
23
 
24
24
  # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
25
- if leftover_tokens <= 0
25
+ if leftover_tokens < 0
26
26
  raise limit_exceeded_exception(token_limit(model_name), text_token_length)
27
27
  end
28
28
 
@@ -25,6 +25,7 @@ module Langchain
25
25
  "code-davinci-002" => 8001,
26
26
  "gpt-4" => 8192,
27
27
  "gpt-4-0314" => 8192,
28
+ "gpt-4-0613" => 8192,
28
29
  "gpt-4-32k" => 32768,
29
30
  "gpt-4-32k-0314" => 32768,
30
31
  "gpt-4-32k-0613" => 32768,
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pgvector", "~> 0.2"
9
9
  #
10
10
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
11
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
12
12
  #
13
13
 
14
14
  # The operators supported by the PostgreSQL vector search adapter
15
15
  OPERATORS = {
16
- "cosine_distance" => "<=>",
17
- "euclidean_distance" => "<->"
16
+ "cosine_distance" => "cosine",
17
+ "euclidean_distance" => "euclidean"
18
18
  }
19
19
  DEFAULT_OPERATOR = "cosine_distance"
20
20
 
21
- attr_reader :operator, :quoted_table_name
21
+ attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
22
22
 
23
23
  # @param url [String] The URL of the PostgreSQL database
24
24
  # @param index_name [String] The name of the table to use for the index
25
25
  # @param llm [Object] The LLM client to use
26
- # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
27
- def initialize(url:, index_name:, llm:, api_key: nil)
28
- require "pg"
26
+ # @param namespace [String] The namespace to use for the index when inserting/querying
27
+ def initialize(url:, index_name:, llm:, namespace: nil)
28
+ depends_on "sequel"
29
+ require "sequel"
30
+ depends_on "pgvector"
29
31
  require "pgvector"
30
32
 
31
- @client = ::PG.connect(url)
32
- registry = ::PG::BasicTypeRegistry.new.define_default_types
33
- ::Pgvector::PG.register_vector(registry)
34
- @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
33
+ @db = Sequel.connect(url)
35
34
 
36
- @index_name = index_name
37
- @quoted_table_name = @client.quote_ident(index_name)
35
+ @table_name = index_name
36
+
37
+ @namespace_column = "namespace"
38
+ @namespace = namespace
38
39
  @operator = OPERATORS[DEFAULT_OPERATOR]
39
40
 
40
41
  super(llm: llm)
41
42
  end
42
43
 
44
+ def documents_model
45
+ Class.new(Sequel::Model(table_name.to_sym)) do
46
+ plugin :pgvector, :vectors
47
+ end
48
+ end
49
+
43
50
  # Upsert a list of texts to the index
44
51
  # @param texts [Array<String>] The texts to add to the index
45
52
  # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
47
54
  # the added or updated texts.
48
55
  def upsert_texts(texts:, ids:)
49
56
  data = texts.zip(ids).flat_map do |(text, id)|
50
- [id, text, llm.embed(text: text)]
57
+ {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
51
58
  end
52
- values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
- # see https://github.com/pgvector/pgvector#storing
54
- client.exec_params(
55
- "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
- #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
57
- data
58
- )
59
+ # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
60
+ @db[table_name.to_sym]
61
+ .insert_conflict(
62
+ target: :id,
63
+ update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
64
+ )
65
+ .multi_insert(data, return: :primary_key)
59
66
  end
60
67
 
61
68
  # Add a list of texts to the index
62
69
  # @param texts [Array<String>] The texts to add to the index
63
70
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
- # @return [PG::Result] The response from the database including the ids of
65
- # the added texts.
71
+ # @return [Array<Integer>] The the ids of the added texts.
66
72
  def add_texts(texts:, ids: nil)
67
73
  if ids.nil? || ids.empty?
68
- data = texts.flat_map do |text|
69
- [text, llm.embed(text: text)]
74
+ data = texts.map do |text|
75
+ {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
70
76
  end
71
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
- client.exec_params(
73
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
- data
75
- )
77
+
78
+ @db[table_name.to_sym].multi_insert(data, return: :primary_key)
76
79
  else
77
80
  upsert_texts(texts: texts, ids: ids)
78
81
  end
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
81
84
  # Update a list of ids and corresponding texts to the index
82
85
  # @param texts [Array<String>] The texts to add to the index
83
86
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
- # @return [PG::Result] The response from the database including the ids of
85
- # the updated texts.
87
+ # @return [Array<Integer>] The ids of the updated texts.
86
88
  def update_texts(texts:, ids:)
87
89
  upsert_texts(texts: texts, ids: ids)
88
90
  end
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
90
92
  # Create default schema
91
93
  # @return [PG::Result] The response from the database
92
94
  def create_default_schema
93
- client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
94
- client.exec(
95
- <<~SQL
96
- CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
97
- id serial PRIMARY KEY,
98
- content TEXT,
99
- vectors VECTOR(#{default_dimension})
100
- );
101
- SQL
102
- )
95
+ db.run "CREATE EXTENSION IF NOT EXISTS vector"
96
+ namespace = namespace_column
97
+ vector_dimension = default_dimension
98
+ db.create_table? table_name.to_sym do
99
+ primary_key :id
100
+ text :content
101
+ column :vectors, "vector(#{vector_dimension})"
102
+ text namespace.to_sym, default: nil
103
+ end
103
104
  end
104
105
 
105
106
  # TODO: Add destroy_default_schema method
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
123
124
  # @param k [Integer] The number of top results to return
124
125
  # @return [Array<Hash>] The results of the search
125
126
  def similarity_search_by_vector(embedding:, k: 4)
126
- result = client.transaction do |conn|
127
- conn.exec("SET LOCAL ivfflat.probes = 10;")
128
- query = <<~SQL
129
- SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
130
- SQL
131
- conn.exec_params(query, [embedding, k])
127
+ db.transaction do # BEGIN
128
+ documents_model
129
+ .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
130
+ .where(namespace_column.to_sym => namespace)
132
131
  end
133
-
134
- result.to_a
135
132
  end
136
133
 
137
134
  # Ask a question and return the answer
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
142
139
  search_results = similarity_search(query: question)
143
140
 
144
141
  context = search_results.map do |result|
145
- result["content"].to_s
142
+ result.content.to_s
146
143
  end
147
144
  context = context.join("\n---\n")
148
145
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.8"
4
+ VERSION = "0.6.9"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -324,14 +324,14 @@ dependencies:
324
324
  requirements:
325
325
  - - "~>"
326
326
  - !ruby/object:Gem::Version
327
- version: '0.2'
327
+ version: 0.2.1
328
328
  type: :development
329
329
  prerelease: false
330
330
  version_requirements: !ruby/object:Gem::Requirement
331
331
  requirements:
332
332
  - - "~>"
333
333
  - !ruby/object:Gem::Version
334
- version: '0.2'
334
+ version: 0.2.1
335
335
  - !ruby/object:Gem::Dependency
336
336
  name: pdf-reader
337
337
  requirement: !ruby/object:Gem::Requirement