langchainrb 0.6.8 → 0.6.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e8446cff4207c4784ce29233712430d50c2cbaa551cfe8ad27f413f27ad8346b
4
- data.tar.gz: 8c7105302b673479be9a840ed1a19c815cf5a0415195db5373ce739c472dc2f3
3
+ metadata.gz: 4d046a7cae545da3092694fee60b0fa3caa0852492c9bf7dbbe35001d96106e4
4
+ data.tar.gz: 8ed841d2359400f9a4700a49b82030fbe3dd3563545eca31b487914e8fb236f7
5
5
  SHA512:
6
- metadata.gz: d7bd52c8d161a2d26a1e717711053267ed3b78def7efa2e9411017d324e278baceb1d88eb709cb701986201b785e1ad8ee455948b1bba59d6cb478226b1b5df4
7
- data.tar.gz: eeb639084a8990e1d36d035e6cd6d725377cecc61b3116f70254787f5d591a387c6f53e308dd482d2a96257f231866cffdf6326c516dcf78e60017817d8ebb93
6
+ metadata.gz: eed2f03cbdd74ef42fafd07524dc6bf5c7ac2278e5e55dd1aad5b2cd87f319a91c8719a6e9ac43ac50cda5525f3defab14cfecc749bdec81075d52b3935b23dd
7
+ data.tar.gz: cd4fce88b0f9f545f6d32660879e6fa5044ec74b41dcb41bc29eafb78b307d8b2cb8204856098657d8c9707331c8629c8a707a227fe4d20a5816347f8a66991c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.9] - 2023-07-29
4
+
5
+ ## [0.6.8] - 2023-07-21
6
+
3
7
  ## [0.6.7] - 2023-07-19
4
8
  - Support for OpenAI functions
5
9
  - Streaming vectorsearch ask() responses
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.8)
4
+ langchainrb (0.6.9)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -34,7 +34,7 @@ GEM
34
34
  afm (0.2.2)
35
35
  ai21 (0.2.1)
36
36
  ast (2.4.2)
37
- baran (0.1.6)
37
+ baran (0.1.7)
38
38
  builder (3.2.4)
39
39
  byebug (11.1.3)
40
40
  childprocess (4.1.0)
@@ -189,7 +189,7 @@ GEM
189
189
  ruby-rc4
190
190
  ttfunk
191
191
  pg (1.5.3)
192
- pgvector (0.2.0)
192
+ pgvector (0.2.1)
193
193
  pinecone (0.1.71)
194
194
  dry-struct (~> 1.6.0)
195
195
  dry-validation (~> 1.10.0)
@@ -334,7 +334,7 @@ DEPENDENCIES
334
334
  open-weather-ruby-client (~> 0.3.0)
335
335
  pdf-reader (~> 1.4)
336
336
  pg (~> 1.5)
337
- pgvector (~> 0.2)
337
+ pgvector (~> 0.2.1)
338
338
  pinecone (~> 0.1.6)
339
339
  pry-byebug (~> 3.10.0)
340
340
  qdrant-ruby (~> 0.9.0)
@@ -353,4 +353,4 @@ DEPENDENCIES
353
353
  yard
354
354
 
355
355
  BUNDLED WITH
356
- 2.4.0
356
+ 2.3.22
@@ -22,7 +22,7 @@ module Langchain
22
22
  leftover_tokens = token_limit(model_name) - text_token_length
23
23
 
24
24
  # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
25
- if leftover_tokens <= 0
25
+ if leftover_tokens < 0
26
26
  raise limit_exceeded_exception(token_limit(model_name), text_token_length)
27
27
  end
28
28
 
@@ -25,6 +25,7 @@ module Langchain
25
25
  "code-davinci-002" => 8001,
26
26
  "gpt-4" => 8192,
27
27
  "gpt-4-0314" => 8192,
28
+ "gpt-4-0613" => 8192,
28
29
  "gpt-4-32k" => 32768,
29
30
  "gpt-4-32k-0314" => 32768,
30
31
  "gpt-4-32k-0613" => 32768,
@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
8
8
  # Gem requirements: gem "pgvector", "~> 0.2"
9
9
  #
10
10
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
11
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
12
12
  #
13
13
 
14
14
  # The operators supported by the PostgreSQL vector search adapter
15
15
  OPERATORS = {
16
- "cosine_distance" => "<=>",
17
- "euclidean_distance" => "<->"
16
+ "cosine_distance" => "cosine",
17
+ "euclidean_distance" => "euclidean"
18
18
  }
19
19
  DEFAULT_OPERATOR = "cosine_distance"
20
20
 
21
- attr_reader :operator, :quoted_table_name
21
+ attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
22
22
 
23
23
  # @param url [String] The URL of the PostgreSQL database
24
24
  # @param index_name [String] The name of the table to use for the index
25
25
  # @param llm [Object] The LLM client to use
26
- # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
27
- def initialize(url:, index_name:, llm:, api_key: nil)
28
- require "pg"
26
+ # @param namespace [String] The namespace to use for the index when inserting/querying
27
+ def initialize(url:, index_name:, llm:, namespace: nil)
28
+ depends_on "sequel"
29
+ require "sequel"
30
+ depends_on "pgvector"
29
31
  require "pgvector"
30
32
 
31
- @client = ::PG.connect(url)
32
- registry = ::PG::BasicTypeRegistry.new.define_default_types
33
- ::Pgvector::PG.register_vector(registry)
34
- @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
33
+ @db = Sequel.connect(url)
35
34
 
36
- @index_name = index_name
37
- @quoted_table_name = @client.quote_ident(index_name)
35
+ @table_name = index_name
36
+
37
+ @namespace_column = "namespace"
38
+ @namespace = namespace
38
39
  @operator = OPERATORS[DEFAULT_OPERATOR]
39
40
 
40
41
  super(llm: llm)
41
42
  end
42
43
 
44
+ def documents_model
45
+ Class.new(Sequel::Model(table_name.to_sym)) do
46
+ plugin :pgvector, :vectors
47
+ end
48
+ end
49
+
43
50
  # Upsert a list of texts to the index
44
51
  # @param texts [Array<String>] The texts to add to the index
45
52
  # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
47
54
  # the added or updated texts.
48
55
  def upsert_texts(texts:, ids:)
49
56
  data = texts.zip(ids).flat_map do |(text, id)|
50
- [id, text, llm.embed(text: text)]
57
+ {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
51
58
  end
52
- values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
- # see https://github.com/pgvector/pgvector#storing
54
- client.exec_params(
55
- "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
- #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
57
- data
58
- )
59
+ # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
60
+ @db[table_name.to_sym]
61
+ .insert_conflict(
62
+ target: :id,
63
+ update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
64
+ )
65
+ .multi_insert(data, return: :primary_key)
59
66
  end
60
67
 
61
68
  # Add a list of texts to the index
62
69
  # @param texts [Array<String>] The texts to add to the index
63
70
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
- # @return [PG::Result] The response from the database including the ids of
65
- # the added texts.
71
+ # @return [Array<Integer>] The the ids of the added texts.
66
72
  def add_texts(texts:, ids: nil)
67
73
  if ids.nil? || ids.empty?
68
- data = texts.flat_map do |text|
69
- [text, llm.embed(text: text)]
74
+ data = texts.map do |text|
75
+ {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
70
76
  end
71
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
- client.exec_params(
73
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
- data
75
- )
77
+
78
+ @db[table_name.to_sym].multi_insert(data, return: :primary_key)
76
79
  else
77
80
  upsert_texts(texts: texts, ids: ids)
78
81
  end
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
81
84
  # Update a list of ids and corresponding texts to the index
82
85
  # @param texts [Array<String>] The texts to add to the index
83
86
  # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
- # @return [PG::Result] The response from the database including the ids of
85
- # the updated texts.
87
+ # @return [Array<Integer>] The ids of the updated texts.
86
88
  def update_texts(texts:, ids:)
87
89
  upsert_texts(texts: texts, ids: ids)
88
90
  end
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
90
92
  # Create default schema
91
93
  # @return [PG::Result] The response from the database
92
94
  def create_default_schema
93
- client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
94
- client.exec(
95
- <<~SQL
96
- CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
97
- id serial PRIMARY KEY,
98
- content TEXT,
99
- vectors VECTOR(#{default_dimension})
100
- );
101
- SQL
102
- )
95
+ db.run "CREATE EXTENSION IF NOT EXISTS vector"
96
+ namespace = namespace_column
97
+ vector_dimension = default_dimension
98
+ db.create_table? table_name.to_sym do
99
+ primary_key :id
100
+ text :content
101
+ column :vectors, "vector(#{vector_dimension})"
102
+ text namespace.to_sym, default: nil
103
+ end
103
104
  end
104
105
 
105
106
  # TODO: Add destroy_default_schema method
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
123
124
  # @param k [Integer] The number of top results to return
124
125
  # @return [Array<Hash>] The results of the search
125
126
  def similarity_search_by_vector(embedding:, k: 4)
126
- result = client.transaction do |conn|
127
- conn.exec("SET LOCAL ivfflat.probes = 10;")
128
- query = <<~SQL
129
- SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
130
- SQL
131
- conn.exec_params(query, [embedding, k])
127
+ db.transaction do # BEGIN
128
+ documents_model
129
+ .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
130
+ .where(namespace_column.to_sym => namespace)
132
131
  end
133
-
134
- result.to_a
135
132
  end
136
133
 
137
134
  # Ask a question and return the answer
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
142
139
  search_results = similarity_search(query: question)
143
140
 
144
141
  context = search_results.map do |result|
145
- result["content"].to_s
142
+ result.content.to_s
146
143
  end
147
144
  context = context.join("\n---\n")
148
145
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.8"
4
+ VERSION = "0.6.9"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.8
4
+ version: 0.6.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-21 00:00:00.000000000 Z
11
+ date: 2023-07-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -324,14 +324,14 @@ dependencies:
324
324
  requirements:
325
325
  - - "~>"
326
326
  - !ruby/object:Gem::Version
327
- version: '0.2'
327
+ version: 0.2.1
328
328
  type: :development
329
329
  prerelease: false
330
330
  version_requirements: !ruby/object:Gem::Requirement
331
331
  requirements:
332
332
  - - "~>"
333
333
  - !ruby/object:Gem::Version
334
- version: '0.2'
334
+ version: 0.2.1
335
335
  - !ruby/object:Gem::Dependency
336
336
  name: pdf-reader
337
337
  requirement: !ruby/object:Gem::Requirement