langchainrb 0.6.18 → 0.6.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 437c6387ded139ed1a513414bfb7242cdbadf1ba6526c7a89346aa2fa9490fc2
4
- data.tar.gz: dd6f437a4bbc4807a16631dd790f66c9de4e9456011b2c4f84302fe3fab1377b
3
+ metadata.gz: d7be5e031274fba7a4c0d7fc2cd3f472ed83fb66d8c6b355fb71fbf69a825b73
4
+ data.tar.gz: 745cbc4f3d7b569d2e1407acc8be123f77a0aac2964840d7c3dca215592811ee
5
5
  SHA512:
6
- metadata.gz: 24748539de50dfa816fdb71173ef00a6b04f9737f32926fca919865a49b9812dd9f1fdb286c361c98e33cc994f67e8988ab688bfdf6bf3020d954eb0c791177c
7
- data.tar.gz: 283b10460187cada7485e08a19c89e7485925ab2f73a5ad51b06a72e8fd9ee1600ddac9d000f13c0c1af13f6defece9fdcc272489d0df803f94da96fe1c76cfd
6
+ metadata.gz: e1392abe2fc0c4928593bd77d0e62688e3959ec39fd3f7bb5effc784b47599402c611ecc545868178b5d04ec688d68d6406f220697e8bfe40771cc593292a192
7
+ data.tar.gz: 926bccf20c71af3d31d942cf439336df9edc489a8e5e0359a6c24bb26e5b818be048a7ef63ebcce721bb99392b49e407288ffdd7387dd33d3f0161e92ff6e045
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.19] - 2023-10-18
4
+ - Elasticsearch vector search support
5
+ - Fix `lib/langchain/railtie.rb` not being loaded with the gem
6
+
3
7
  ## [0.6.18] - 2023-10-16
4
8
  - Introduce `Langchain::LLM::Response`` object
5
9
  - Introduce `Langchain::Chunk` object
data/README.md CHANGED
@@ -19,11 +19,11 @@ Langchain.rb is a library that's an abstraction layer on top many emergent AI, M
19
19
 
20
20
  Install the gem and add to the application's Gemfile by executing:
21
21
 
22
- $ bundle add langchainrb
22
+ bundle add langchainrb
23
23
 
24
24
  If bundler is not being used to manage dependencies, install the gem by executing:
25
25
 
26
- $ gem install langchainrb
26
+ gem install langchainrb
27
27
 
28
28
  ## Usage
29
29
 
@@ -37,7 +37,7 @@ require "langchain"
37
37
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
38
38
  | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
- | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
+ | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
42
  | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "forwardable"
4
-
5
3
  module Langchain::Vectorsearch
6
4
  # = Vector Databases
7
5
  # A vector database a type of database that stores data as high-dimensional vectors, which are mathematical representations of features or attributes. Each vector has a certain number of dimensions, which can range from tens to thousands, depending on the complexity and granularity of the data.
@@ -9,11 +7,13 @@ module Langchain::Vectorsearch
9
7
  # == Available vector databases
10
8
  #
11
9
  # - {Langchain::Vectorsearch::Chroma}
10
+ # - {Langchain::Vectorsearch::Elasticsearch}
11
+ # - {Langchain::Vectorsearch::Hnswlib}
12
12
  # - {Langchain::Vectorsearch::Milvus}
13
+ # - {Langchain::Vectorsearch::Pgvector}
13
14
  # - {Langchain::Vectorsearch::Pinecone}
14
15
  # - {Langchain::Vectorsearch::Qdrant}
15
16
  # - {Langchain::Vectorsearch::Weaviate}
16
- # - {Langchain::Vectorsearch::Pgvector}
17
17
  #
18
18
  # == Usage
19
19
  #
@@ -150,9 +150,6 @@ module Langchain::Vectorsearch
150
150
  raise NotImplementedError, "#{self.class.name} does not support asking questions"
151
151
  end
152
152
 
153
- def_delegators :llm,
154
- :default_dimension
155
-
156
153
  # HyDE-style prompt
157
154
  #
158
155
  # @param [String] User's question
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Chroma DB
7
7
  #
8
- # Gem requirements: gem "chroma-db", "~> 0.6.0"
8
+ # Gem requirements:
9
+ # gem "chroma-db", "~> 0.6.0"
9
10
  #
10
11
  # Usage:
11
12
  # chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, llm_api_key:, api_key: nil)
@@ -29,14 +30,15 @@ module Langchain::Vectorsearch
29
30
 
30
31
  # Add a list of texts to the index
31
32
  # @param texts [Array<String>] The list of texts to add
33
+ # @param ids [Array<String>] The list of ids to use for the texts (optional)
34
+ # @param metadatas [Array<Hash>] The list of metadata to use for the texts (optional)
32
35
  # @return [Hash] The response from the server
33
- def add_texts(texts:, ids: [])
36
+ def add_texts(texts:, ids: [], metadatas: [])
34
37
  embeddings = Array(texts).map.with_index do |text, i|
35
38
  ::Chroma::Resources::Embedding.new(
36
39
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
37
40
  embedding: llm.embed(text: text).embedding,
38
- # TODO: Add support for passing metadata
39
- metadata: {}, # metadatas[index],
41
+ metadata: metadatas[i] || {},
40
42
  document: text # Do we actually need to store the whole original document?
41
43
  )
42
44
  end
@@ -45,13 +47,12 @@ module Langchain::Vectorsearch
45
47
  collection.add(embeddings)
46
48
  end
47
49
 
48
- def update_texts(texts:, ids:)
50
+ def update_texts(texts:, ids:, metadatas: [])
49
51
  embeddings = Array(texts).map.with_index do |text, i|
50
52
  ::Chroma::Resources::Embedding.new(
51
53
  id: ids[i].to_s,
52
54
  embedding: llm.embed(text: text).embedding,
53
- # TODO: Add support for passing metadata
54
- metadata: [], # metadatas[index],
55
+ metadata: metadatas[i] || {},
55
56
  document: text # Do we actually need to store the whole original document?
56
57
  )
57
58
  end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::Vectorsearch
4
+ class Elasticsearch < Base
5
+ #
6
+ # Wrapper around Elasticsearch vector search capabilities.
7
+ #
8
+ # Setting up Elasticsearch:
9
+ # 1. Get Elasticsearch up and running with Docker: https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
10
+ # 2. Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
11
+ # 3. Set the ELASTICSEARCH_URL environment variable
12
+ #
13
+ # Gem requirements:
14
+ # gem "elasticsearch", "~> 8.0.0"
15
+ #
16
+ # Usage:
17
+ # llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
18
+ # es = Langchain::Vectorsearch::Elasticsearch.new(
19
+ # url: ENV["ELASTICSEARCH_URL"],
20
+ # index_name: "docs",
21
+ # llm: llm,
22
+ # es_options: {
23
+ # transport_options: {ssl: {verify: false}},
24
+ # ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
25
+ # }
26
+ # )
27
+ #
28
+ # es.create_default_schema
29
+ # es.add_texts(texts: ["..."])
30
+ # es.similarity_search(text: "...")
31
+ #
32
+ attr_accessor :es_client, :index_name, :options
33
+
34
+ def initialize(url:, index_name:, llm:, api_key: nil, es_options: {})
35
+ require "elasticsearch"
36
+
37
+ @options = {
38
+ url: url,
39
+ request_timeout: 20,
40
+ log: false
41
+ }.merge(es_options)
42
+
43
+ @es_client = ::Elasticsearch::Client.new(**options)
44
+ @index_name = index_name
45
+
46
+ super(llm: llm)
47
+ end
48
+
49
+ def add_texts(texts: [])
50
+ body = texts.map do |text|
51
+ [
52
+ {index: {_index: index_name}},
53
+ {input: text, input_vector: llm.embed(text: text).embedding}
54
+ ]
55
+ end.flatten
56
+
57
+ es_client.bulk(body: body)
58
+ end
59
+
60
+ def update_texts(texts: [], ids: [])
61
+ body = texts.map.with_index do |text, i|
62
+ [
63
+ {index: {_index: index_name, _id: ids[i]}},
64
+ {input: text, input_vector: llm.embed(text: text).embedding}
65
+ ]
66
+ end.flatten
67
+
68
+ es_client.bulk(body: body)
69
+ end
70
+
71
+ def create_default_schema
72
+ es_client.indices.create(
73
+ index: index_name,
74
+ body: default_schema
75
+ )
76
+ end
77
+
78
+ def delete_default_schema
79
+ es_client.indices.delete(
80
+ index: index_name
81
+ )
82
+ end
83
+
84
+ def default_vector_settings
85
+ {type: "dense_vector", dims: llm.default_dimension}
86
+ end
87
+
88
+ def vector_settings
89
+ options[:vector_settings] || default_vector_settings
90
+ end
91
+
92
+ def default_schema
93
+ {
94
+ mappings: {
95
+ properties: {
96
+ input: {
97
+ type: "text"
98
+ },
99
+ input_vector: vector_settings
100
+ }
101
+ }
102
+ }
103
+ end
104
+
105
+ def default_query(query_vector)
106
+ {
107
+ script_score: {
108
+ query: {match_all: {}},
109
+ script: {
110
+ source: "cosineSimilarity(params.query_vector, 'input_vector') + 1.0",
111
+ params: {
112
+ query_vector: query_vector
113
+ }
114
+ }
115
+ }
116
+ }
117
+ end
118
+
119
+ # TODO: Implement this
120
+ # def ask()
121
+ # end
122
+
123
+ def similarity_search(text: "", k: 10, query: {})
124
+ if text.empty? && query.empty?
125
+ raise "Either text or query should pass as an argument"
126
+ end
127
+
128
+ if query.empty?
129
+ query_vector = llm.embed(text: text).embedding
130
+
131
+ query = default_query(query_vector)
132
+ end
133
+
134
+ es_client.search(body: {query: query, size: k}).body
135
+ end
136
+
137
+ def similarity_search_by_vector(embedding: [], k: 10, query: {})
138
+ if embedding.empty? && query.empty?
139
+ raise "Either embedding or query should pass as an argument"
140
+ end
141
+
142
+ query = default_query(embedding) if query.empty?
143
+
144
+ es_client.search(body: {query: query, size: k}).body
145
+ end
146
+ end
147
+ end
@@ -5,7 +5,8 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Milvus REST APIs.
7
7
  #
8
- # Gem requirements: gem "milvus", "~> 0.9.2"
8
+ # Gem requirements:
9
+ # gem "milvus", "~> 0.9.2"
9
10
  #
10
11
  # Usage:
11
12
  # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
@@ -70,7 +71,7 @@ module Langchain::Vectorsearch
70
71
  type_params: [
71
72
  {
72
73
  key: "dim",
73
- value: default_dimension.to_s
74
+ value: llm.default_dimension.to_s
74
75
  }
75
76
  ]
76
77
  }
@@ -5,10 +5,12 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # The PostgreSQL vector search adapter
7
7
  #
8
- # Gem requirements: gem "pgvector", "~> 0.2"
8
+ # Gem requirements:
9
+ # gem "sequel", "~> 5.68.0"
10
+ # gem "pgvector", "~> 0.2"
9
11
  #
10
12
  # Usage:
11
- # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
13
+ # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace: nil)
12
14
  #
13
15
 
14
16
  # The operators supported by the PostgreSQL vector search adapter
@@ -91,7 +93,7 @@ module Langchain::Vectorsearch
91
93
  def create_default_schema
92
94
  db.run "CREATE EXTENSION IF NOT EXISTS vector"
93
95
  namespace_column = @namespace_column
94
- vector_dimension = default_dimension
96
+ vector_dimension = llm.default_dimension
95
97
  db.create_table? table_name.to_sym do
96
98
  primary_key :id
97
99
  text :content
@@ -150,3 +152,7 @@ module Langchain::Vectorsearch
150
152
  end
151
153
  end
152
154
  end
155
+
156
+ # Rails connection when configuring vectorsearch
157
+ # Update READMEs
158
+ # Rails migration to create a migration
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Pinecone API.
7
7
  #
8
- # Gem requirements: gem "pinecone", "~> 0.1.6"
8
+ # Gem requirements:
9
+ # gem "pinecone", "~> 0.1.6"
9
10
  #
10
11
  # Usage:
11
- # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Pinecone client
@@ -102,7 +103,7 @@ module Langchain::Vectorsearch
102
103
  client.create_index(
103
104
  metric: DEFAULT_METRIC,
104
105
  name: index_name,
105
- dimension: default_dimension
106
+ dimension: llm.default_dimension
106
107
  )
107
108
  end
108
109
 
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Qdrant
7
7
  #
8
- # Gem requirements: gem "qdrant-ruby", "~> 0.9.3"
8
+ # Gem requirements:
9
+ # gem "qdrant-ruby", "~> 0.9.3"
9
10
  #
10
11
  # Usage:
11
- # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Qdrant client
@@ -82,7 +83,7 @@ module Langchain::Vectorsearch
82
83
  collection_name: index_name,
83
84
  vectors: {
84
85
  distance: DEFAULT_METRIC.capitalize,
85
- size: default_dimension
86
+ size: llm.default_dimension
86
87
  }
87
88
  )
88
89
  end
@@ -5,10 +5,11 @@ module Langchain::Vectorsearch
5
5
  #
6
6
  # Wrapper around Weaviate
7
7
  #
8
- # Gem requirements: gem "weaviate-ruby", "~> 0.8.3"
8
+ # Gem requirements:
9
+ # gem "weaviate-ruby", "~> 0.8.3"
9
10
  #
10
11
  # Usage:
11
- # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
+ # weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:, llm_api_key:)
12
13
  #
13
14
 
14
15
  # Initialize the Weaviate adapter
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.18"
4
+ VERSION = "0.6.19"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -89,3 +89,5 @@ module Langchain
89
89
  class BaseError < StandardError; end
90
90
  end
91
91
  end
92
+
93
+ require "langchain/railtie" if defined?(Rails)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.18
4
+ version: 0.6.19
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-17 00:00:00.000000000 Z
11
+ date: 2023-10-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -220,6 +220,20 @@ dependencies:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
222
  version: 0.8.0
223
+ - !ruby/object:Gem::Dependency
224
+ name: elasticsearch
225
+ requirement: !ruby/object:Gem::Requirement
226
+ requirements:
227
+ - - "~>"
228
+ - !ruby/object:Gem::Version
229
+ version: 8.2.0
230
+ type: :development
231
+ prerelease: false
232
+ version_requirements: !ruby/object:Gem::Requirement
233
+ requirements:
234
+ - - "~>"
235
+ - !ruby/object:Gem::Version
236
+ version: 8.2.0
223
237
  - !ruby/object:Gem::Dependency
224
238
  name: eqn
225
239
  requirement: !ruby/object:Gem::Requirement
@@ -603,6 +617,7 @@ files:
603
617
  - lib/langchain/utils/token_length/token_limit_exceeded.rb
604
618
  - lib/langchain/vectorsearch/base.rb
605
619
  - lib/langchain/vectorsearch/chroma.rb
620
+ - lib/langchain/vectorsearch/elasticsearch.rb
606
621
  - lib/langchain/vectorsearch/hnswlib.rb
607
622
  - lib/langchain/vectorsearch/milvus.rb
608
623
  - lib/langchain/vectorsearch/pgvector.rb