langchainrb 0.6.18 → 0.6.19

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 437c6387ded139ed1a513414bfb7242cdbadf1ba6526c7a89346aa2fa9490fc2
4
- data.tar.gz: dd6f437a4bbc4807a16631dd790f66c9de4e9456011b2c4f84302fe3fab1377b
3
+ metadata.gz: d7be5e031274fba7a4c0d7fc2cd3f472ed83fb66d8c6b355fb71fbf69a825b73
4
+ data.tar.gz: 745cbc4f3d7b569d2e1407acc8be123f77a0aac2964840d7c3dca215592811ee
5
5
  SHA512:
6
- metadata.gz: 24748539de50dfa816fdb71173ef00a6b04f9737f32926fca919865a49b9812dd9f1fdb286c361c98e33cc994f67e8988ab688bfdf6bf3020d954eb0c791177c
7
- data.tar.gz: 283b10460187cada7485e08a19c89e7485925ab2f73a5ad51b06a72e8fd9ee1600ddac9d000f13c0c1af13f6defece9fdcc272489d0df803f94da96fe1c76cfd
6
+ metadata.gz: e1392abe2fc0c4928593bd77d0e62688e3959ec39fd3f7bb5effc784b47599402c611ecc545868178b5d04ec688d68d6406f220697e8bfe40771cc593292a192
7
+ data.tar.gz: 926bccf20c71af3d31d942cf439336df9edc489a8e5e0359a6c24bb26e5b818be048a7ef63ebcce721bb99392b49e407288ffdd7387dd33d3f0161e92ff6e045
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.19] - 2023-10-18
4
+ - Elasticsearch vector search support
5
+ - Fix `lib/langchain/railtie.rb` not being loaded with the gem
6
+
3
7
  ## [0.6.18] - 2023-10-16
4
8
  - Introduce `Langchain::LLM::Response`` object
5
9
  - Introduce `Langchain::Chunk` object
data/README.md CHANGED
@@ -19,11 +19,11 @@ Langchain.rb is a library that's an abstraction layer on top many emergent AI, M
19
19
 
20
20
  Install the gem and add to the application's Gemfile by executing:
21
21
 
22
- $ bundle add langchainrb
22
+ bundle add langchainrb
23
23
 
24
24
  If bundler is not being used to manage dependencies, install the gem by executing:
25
25
 
26
- $ gem install langchainrb
26
+ gem install langchainrb
27
27
 
28
28
  ## Usage
29
29
 
@@ -37,7 +37,7 @@ require "langchain"
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
38
  | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
- | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
+ | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "forwardable"
4
-
5
3
  module Langchain::Vectorsearch
6
4
  # = Vector Databases
7
5
  # A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
@@ -9,11 +7,13 @@ module Langchain::Vectorsearch
9
7
  # == Available vector databases
10
8
  #
11
9
  # - {Langchain::Vectorsearch::Chroma}
10
+ # - {Langchain::Vectorsearch::Elasticsearch}
11
+ # - {Langchain::Vectorsearch::Hnswlib}
12
12
  # - {Langchain::Vectorsearch::Milvus}
13
+ # - {Langchain::Vectorsearch::Pgvector}
13
14
  # - {Langchain::Vectorsearch::Pinecone}
14
15
  # - {Langchain::Vectorsearch::Qdrant}
15
16
  # - {Langchain::Vectorsearch::Weaviate}
16
- # - {Langchain::Vectorsearch::Pgvector}
17
17
  #
18
18
  # == Usage
19
19
  #
@@ -150,9 +150,6 @@ module Langchain::Vectorsearch
150
150
  raise NotImplementedError, "#{self.class.name} does not support asking questions"
151
151
  end
152
152
 
153
- def_delegators :llm,
154
- :default_dimension
155
-
156
153
  # HyDE-style prompt
157
154
  #
158
155
  # @param [String] User's question
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Chroma DB
7
7
  #
8
- # Gem requirements: gem "chroma-db", "~> 0.6.0"
8
+ # Gem requirements:
9
+ # gem "chroma-db", "~> 0.6.0"
9
10
  #
10
11
  # Usage:
11
12
  # chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, llm_api_key:, api_key: nil)
@@ -29,14 +30,15 @@ module Langchain::Vectorsearch
29
30
 
30
31
  # Add a list of texts to the index
31
32
  # @param texts [Array<String>] The list of texts to add
33
+ # @param ids [Array<String>] The list of ids to use for the texts (optional)
34
+ # @param metadatas [Array<Hash>] The list of metadata to use for the texts (optional)
32
35
  # @return [Hash] The response from the server
33
- def add_texts(texts:, ids: [])
36
+ def add_texts(texts:, ids: [], metadatas: [])
34
37
  embeddings = Array(texts).map.with_index do |text, i|
35
38
  ::Chroma::Resources::Embedding.new(
36
39
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
37
40
  embedding: llm.embed(text: text).embedding,
38
- # TODO: Add support for passing metadata
39
- metadata: {}, # metadatas[index],
41
+ metadata: metadatas[i] || {},
40
42
  document: text # Do we actually need to store the whole original document?
41
43
  )
42
44
  end
@@ -45,13 +47,12 @@ module Langchain::Vectorsearch
45
47
  collection.add(embeddings)
46
48
  end
47
49
 
48
- def update_texts(texts:, ids:)
50
+ def update_texts(texts:, ids:, metadatas: [])
49
51
  embeddings = Array(texts).map.with_index do |text, i|
50
52
  ::Chroma::Resources::Embedding.new(
51
53
  id: ids[i].to_s,
52
54
  embedding: llm.embed(text: text).embedding,
53
- # TODO: Add support for passing metadata
54
- metadata: [], # metadatas[index],
55
+ metadata: metadatas[i] || {},
55
56
  document: text # Do we actually need to store the whole original document?
56
57
  )
57
58
  end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::Vectorsearch
4
+ class Elasticsearch < Base
5
+ #
6
+ # Wrapper around Elasticsearch vector search capabilities.
7
+ #
8
+ # Setting up Elasticsearch:
9
+ # 1. Get Elasticsearch up and running with Docker: https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
10
+ # 2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
11
+ # 3. Set the ELASTICSEARCH_URL environment variable
12
+ #
13
+ # Gem requirements:
14
+ # gem "elasticsearch", "~> 8.0.0"
15
+ #
16
+ # Usage:
17
+ # llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
18
+ # es = Langchain::Vectorsearch::Elasticsearch.new(
19
+ # url: ENV["ELASTICSEARCH_URL"],
20
+ # index_name: "docs",
21
+ # llm: llm,
22
+ # es_options: {
23
+ # transport_options: {ssl: {verify: false}},
24
+ # ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
25
+ # }
26
+ # )
27
+ #
28
+ # es.create_default_schema
29
+ # es.add_texts(texts: ["..."])
30
+ # es.similarity_search(text: "...")
31
+ #
32
+ attr_accessor :es_client, :index_name, :options
33
+
34
+ def initialize(url:, index_name:, llm:, api_key: nil, es_options: {})
35
+ require "elasticsearch"
36
+
37
+ @options = {
38
+ url: url,
39
+ request_timeout: 20,
40
+ log: false
41
+ }.merge(es_options)
42
+
43
+ @es_client = ::Elasticsearch::Client.new(**options)
44
+ @index_name = index_name
45
+
46
+ super(llm: llm)
47
+ end
48
+
49
+ def add_texts(texts: [])
50
+ body = texts.map do |text|
51
+ [
52
+ {index: {_index: index_name}},
53
+ {input: text, input_vector: llm.embed(text: text).embedding}
54
+ ]
55
+ end.flatten
56
+
57
+ es_client.bulk(body: body)
58
+ end
59
+
60
+ def update_texts(texts: [], ids: [])
61
+ body = texts.map.with_index do |text, i|
62
+ [
63
+ {index: {_index: index_name, _id: ids[i]}},
64
+ {input: text, input_vector: llm.embed(text: text).embedding}
65
+ ]
66
+ end.flatten
67
+
68
+ es_client.bulk(body: body)
69
+ end
70
+
71
+ def create_default_schema
72
+ es_client.indices.create(
73
+ index: index_name,
74
+ body: default_schema
75
+ )
76
+ end
77
+
78
+ def delete_default_schema
79
+ es_client.indices.delete(
80
+ index: index_name
81
+ )
82
+ end
83
+
84
+ def default_vector_settings
85
+ {type: "dense_vector", dims: llm.default_dimension}
86
+ end
87
+
88
+ def vector_settings
89
+ options[:vector_settings] || default_vector_settings
90
+ end
91
+
92
+ def default_schema
93
+ {
94
+ mappings: {
95
+ properties: {
96
+ input: {
97
+ type: "text"
98
+ },
99
+ input_vector: vector_settings
100
+ }
101
+ }
102
+ }
103
+ end
104
+
105
+ def default_query(query_vector)
106
+ {
107
+ script_score: {
108
+ query: {match_all: {}},
109
+ script: {
110
+ source: "cosineSimilarity(params.query_vector, 'input_vector') + 1.0",
111
+ params: {
112
+ query_vector: query_vector
113
+ }
114
+ }
115
+ }
116
+ }
117
+ end
118
+
119
+ # TODO: Implement this
120
+ # def ask()
121
+ # end
122
+
123
+ def similarity_search(text: "", k: 10, query: {})
124
+ if text.empty? && query.empty?
125
+ raise "Either text or query should pass as an argument"
126
+ end
127
+
128
+ if query.empty?
129
+ query_vector = llm.embed(text: text).embedding
130
+
131
+ query = default_query(query_vector)
132
+ end
133
+
134
+ es_client.search(body: {query: query, size: k}).body
135
+ end
136
+
137
+ def similarity_search_by_vector(embedding: [], k: 10, query: {})
138
+ if embedding.empty? && query.empty?
139
+ raise "Either embedding or query should pass as an argument"
140
+ end
141
+
142
+ query = default_query(embedding) if query.empty?
143
+
144
+ es_client.search(body: {query: query, size: k}).body
145
+ end
146
+ end
147
+ end
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Milvus REST APIs.
7
7
  #
8
- # Gem requirements: gem "milvus", "~> 0.9.2"
8
+ # Gem requirements:
9
+ # gem "milvus", "~> 0.9.2"
9
10
  #
10
11
  # Usage:
11
12
  # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
@@ -70,7 +71,7 @@ module Langchain::Vectorsearch
70
71
  type_params: [
71
72
  {
72
73
  key: "dim",
73
- value: default_dimension.to_s
74
+ value: llm.default_dimension.to_s
74
75
  }
75
76
  ]
76
77
  }
@@ -5,10 +5,12 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # The PostgreSQL vector search adapter
7
7
  #
8
- # Gem requirements: gem "pgvector", "~> 0.2"
8
+ # Gem requirements:
9
+ # gem "sequel", "~> 5.68.0"
10
+ # gem "pgvector", "~> 0.2"
9
11
  #
10
12
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
13
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
12
14
  #
13
15
 
14
16
  # The operators supported by the PostgreSQL vector search adapter
@@ -91,7 +93,7 @@ module Langchain::Vectorsearch
91
93
  def create_default_schema
92
94
  db.run "CREATE EXTENSION IF NOT EXISTS vector"
93
95
  namespace_column = @namespace_column
94
- vector_dimension = default_dimension
96
+ vector_dimension = llm.default_dimension
95
97
  db.create_table? table_name.to_sym do
96
98
  primary_key :id
97
99
  text :content
@@ -150,3 +152,7 @@ module Langchain::Vectorsearch
150
152
  end
151
153
  end
152
154
  end
155
+
156
+ # Rails connection when configuring vectorsearch
157
+ # Update READMEs
158
+ # Rails migration to create a migration
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Pinecone API.
7
7
  #
8
- # Gem requirements: gem "pinecone", "~> 0.1.6"
8
+ # Gem requirements:
9
+ # gem "pinecone", "~> 0.1.6"
9
10
  #
10
11
  # Usage:
11
- # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Pinecone client
@@ -102,7 +103,7 @@ module Langchain::Vectorsearch
102
103
  client.create_index(
103
104
  metric: DEFAULT_METRIC,
104
105
  name: index_name,
105
- dimension: default_dimension
106
+ dimension: llm.default_dimension
106
107
  )
107
108
  end
108
109
 
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Qdrant
7
7
  #
8
- # Gem requirements: gem "qdrant-ruby", "~> 0.9.3"
8
+ # Gem requirements:
9
+ # gem "qdrant-ruby", "~> 0.9.3"
9
10
  #
10
11
  # Usage:
11
- # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Qdrant client
@@ -82,7 +83,7 @@ module Langchain::Vectorsearch
82
83
  collection_name: index_name,
83
84
  vectors: {
84
85
  distance: DEFAULT_METRIC.capitalize,
85
- size: default_dimension
86
+ size: llm.default_dimension
86
87
  }
87
88
  )
88
89
  end
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
8
+ # Gem requirements:
9
+ # gem "weaviate-ruby", "~> 0.8.3"
9
10
  #
10
11
  # Usage:
11
- # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Weaviate adapter
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.18"
4
+ VERSION = "0.6.19"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -89,3 +89,5 @@ module Langchain
89
89
  class BaseError < StandardError; end
90
90
  end
91
91
  end
92
+
93
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.18
4
+ version: 0.6.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-17 00:00:00.000000000 Z
11
+ date: 2023-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -220,6 +220,20 @@ dependencies:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
222
  version: 0.8.0
223
+ - !ruby/object:Gem::Dependency
224
+ name: elasticsearch
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - "~>"
228
+ - !ruby/object:Gem::Version
229
+ version: 8.2.0
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - "~>"
235
+ - !ruby/object:Gem::Version
236
+ version: 8.2.0
223
237
  - !ruby/object:Gem::Dependency
224
238
  name: eqn
225
239
  requirement: !ruby/object:Gem::Requirement
@@ -603,6 +617,7 @@ files:
603
617
  - lib/langchain/utils/token_length/token_limit_exceeded.rb
604
618
  - lib/langchain/vectorsearch/base.rb
605
619
  - lib/langchain/vectorsearch/chroma.rb
620
+ - lib/langchain/vectorsearch/elasticsearch.rb
606
621
  - lib/langchain/vectorsearch/hnswlib.rb
607
622
  - lib/langchain/vectorsearch/milvus.rb
608
623
  - lib/langchain/vectorsearch/pgvector.rb