langchainrb 0.3.12 → 0.3.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
4
- data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
3
+ metadata.gz: 2ee811b2bac8fadea4d90c4212363a901829a4aac219da0f2a2dcbe7c6f59c5b
4
+ data.tar.gz: 8fa32e6df4aaf69cb6d29977913c1b8a30d6f65b777b1f90c8a7f504d869ca8f
5
5
  SHA512:
6
- metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
7
- data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4
6
+ metadata.gz: cbb7e0c975333248c01082a47f7096fb9d6807c3b7619424eb9348238008d7b4257518287d9358114bf4e3a589349520ebf71ace00bf1fe8906afd27e8b1418a
7
+ data.tar.gz: 759444abe0b17518c6ef31fed6980f6bc0d3d096606860c4d6fddb8baeda4e0a23fc3909e42eba0f32912a786abec76cac54384533db2787e05d741f0907fa1d
data/.env.example CHANGED
@@ -11,4 +11,5 @@ QDRANT_API_KEY=
11
11
  QDRANT_URL=
12
12
  SERPAPI_API_KEY=
13
13
  WEAVIATE_API_KEY=
14
- WEAVIATE_URL=
14
+ WEAVIATE_URL=
15
+ POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.13] - 2023-05-26
4
+ - 🔍 Vectorsearch
5
+ - Pgvector support
6
+ - 🚚 Loaders
7
+ - CSV loader
8
+ - JSON loader
9
+ - JSONL loader
10
+
3
11
  ## [0.3.12] - 2023-05-25
4
12
  - 🔍 Vectorsearch
5
13
  - Introduce namespace support for Pinecone
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.12)
4
+ langchainrb (0.3.13)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -148,9 +148,13 @@ GEM
148
148
  milvus (0.9.1)
149
149
  faraday (~> 1)
150
150
  mini_mime (1.1.2)
151
+ mini_portile2 (2.8.2)
151
152
  minitest (5.18.0)
152
153
  multi_xml (0.6.0)
153
154
  multipart-post (2.3.0)
155
+ nokogiri (1.14.3)
156
+ mini_portile2 (~> 2.8.0)
157
+ racc (~> 1.4)
154
158
  nokogiri (1.14.3-arm64-darwin)
155
159
  racc (~> 1.4)
156
160
  nokogiri (1.14.3-x86_64-darwin)
@@ -166,6 +170,8 @@ GEM
166
170
  hashery (~> 2.0)
167
171
  ruby-rc4
168
172
  ttfunk
173
+ pg (1.5.3)
174
+ pgvector (0.1.1)
169
175
  pinecone (0.1.71)
170
176
  dry-struct (~> 1.6.0)
171
177
  dry-validation (~> 1.10.0)
@@ -273,6 +279,7 @@ GEM
273
279
  PLATFORMS
274
280
  arm64-darwin-21
275
281
  arm64-darwin-22
282
+ ruby
276
283
  x86_64-darwin-19
277
284
  x86_64-darwin-22
278
285
  x86_64-linux
@@ -290,6 +297,8 @@ DEPENDENCIES
290
297
  milvus (~> 0.9.0)
291
298
  nokogiri (~> 1.13)
292
299
  pdf-reader (~> 1.4)
300
+ pg (~> 1.5)
301
+ pgvector (< 0.2)
293
302
  pinecone (~> 0.1.6)
294
303
  pry-byebug (~> 3.10.0)
295
304
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -284,12 +284,16 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
284
284
 
285
285
  ##### Supported Formats
286
286
 
287
- | Format | Pocessor | Gem Requirements |
288
- | ------ | ---------------- | :--------------------------: |
289
- | docx | Processors::Docx | `gem "docx", "~> 0.8.0"` |
290
- | html | Processors::HTML | `gem "nokogiri", "~> 1.13"` |
291
- | pdf | Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
292
- | text | Processors::Text | |
287
+
288
+ | Format | Pocessor | Gem Requirements |
289
+ | ------ | ---------------------------- | :--------------------------: |
290
+ | docx | Langchain::Processors::Docx | `gem "docx", "~> 0.8.0"` |
291
+ | html | Langchain::Processors::HTML | `gem "nokogiri", "~> 1.13"` |
292
+ | pdf | Langchain::Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
293
+ | text | Langchain::Processors::Text | |
294
+ | JSON | Langchain::Processors::JSON | |
295
+ | JSONL | Langchain::Processors::JSONL | |
296
+ | csv | Langchain::Processors::CSV | |
293
297
 
294
298
  ## Examples
295
299
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Langchain
6
+ module Processors
7
+ class CSV < Base
8
+ EXTENSIONS = [".csv"]
9
+ CONTENT_TYPES = ["text/csv"]
10
+
11
+ # Parse the document and return the text
12
+ # @param [File] data
13
+ # @return [Array of Hash]
14
+ def parse(data)
15
+ ::CSV.new(data.read).map do |row|
16
+ row.map(&:strip)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSON < Base
6
+ EXTENSIONS = [".json"]
7
+ CONTENT_TYPES = ["application/json"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Hash]
12
+ def parse(data)
13
+ ::JSON.parse(data.read)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSONL < Base
6
+ EXTENSIONS = [".jsonl"]
7
+ CONTENT_TYPES = ["application/jsonl", "application/json-lines", "application/jsonlines"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Array of Hash]
12
+ def parse(data)
13
+ data.read.lines.map do |line|
14
+ ::JSON.parse(line)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/langchain.rb CHANGED
@@ -20,10 +20,13 @@ module Langchain
20
20
 
21
21
  module Processors
22
22
  autoload :Base, "langchain/processors/base"
23
- autoload :PDF, "langchain/processors/pdf"
23
+ autoload :CSV, "langchain/processors/csv"
24
+ autoload :Docx, "langchain/processors/docx"
24
25
  autoload :HTML, "langchain/processors/html"
26
+ autoload :JSON, "langchain/processors/json"
27
+ autoload :JSONL, "langchain/processors/jsonl"
28
+ autoload :PDF, "langchain/processors/pdf"
25
29
  autoload :Text, "langchain/processors/text"
26
- autoload :Docx, "langchain/processors/docx"
27
30
  end
28
31
  end
29
32
 
@@ -37,6 +40,7 @@ module Vectorsearch
37
40
  autoload :Chroma, "vectorsearch/chroma"
38
41
  autoload :Milvus, "vectorsearch/milvus"
39
42
  autoload :Pinecone, "vectorsearch/pinecone"
43
+ autoload :Pgvector, "vectorsearch/pgvector"
40
44
  autoload :Qdrant, "vectorsearch/qdrant"
41
45
  autoload :Weaviate, "vectorsearch/weaviate"
42
46
  end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ # The PostgreSQL vector search adapter
5
+ class Pgvector < Base
6
+ # @param url [String] The URL of the PostgreSQL database
7
+ # @param index_name [String] The name of the table to use for the index
8
+ # @param llm [String] The URL of the Language Layer API
9
+ # @param llm_api_key [String] The API key for the Language Layer API
10
+ # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ require "pg"
13
+ require "pgvector"
14
+
15
+ @client = ::PG.connect(url)
16
+ registry = ::PG::BasicTypeRegistry.new.define_default_types
17
+ ::Pgvector::PG.register_vector(registry)
18
+ @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
+
20
+ @index_name = index_name
21
+
22
+ super(llm: llm, llm_api_key: llm_api_key)
23
+ end
24
+
25
+ # Add a list of texts to the index
26
+ # @param texts [Array<String>] The texts to add to the index
27
+ # @return [PG::Result] The response from the database
28
+ def add_texts(texts:)
29
+ data = texts.flat_map do |text|
30
+ [text, llm_client.embed(text: text)]
31
+ end
32
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
+ client.exec_params(
34
+ "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
35
+ data
36
+ )
37
+ end
38
+
39
+ # Create default schema
40
+ # @return [PG::Result] The response from the database
41
+ def create_default_schema
42
+ client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
+ client.exec(
44
+ <<~SQL
45
+ CREATE TABLE IF NOT EXISTS #{@index_name} (
46
+ id serial PRIMARY KEY,
47
+ content TEXT,
48
+ vectors VECTOR(#{default_dimension})
49
+ );
50
+ SQL
51
+ )
52
+ end
53
+
54
+ # Search for similar texts in the index
55
+ # @param query [String] The text to search for
56
+ # @param k [Integer] The number of top results to return
57
+ # @return [Array<Hash>] The results of the search
58
+ def similarity_search(query:, k: 4)
59
+ embedding = llm_client.embed(text: query)
60
+
61
+ similarity_search_by_vector(
62
+ embedding: embedding,
63
+ k: k
64
+ )
65
+ end
66
+
67
+ # Search for similar texts in the index by the passed in vector.
68
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
69
+ # @param embedding [Array<Float>] The vector to search for
70
+ # @param k [Integer] The number of top results to return
71
+ # @return [Array<Hash>] The results of the search
72
+ def similarity_search_by_vector(embedding:, k: 4)
73
+ result = client.transaction do |conn|
74
+ conn.exec("SET LOCAL ivfflat.probes = 10;")
75
+ query = <<~SQL
76
+ SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
77
+ SQL
78
+ conn.exec_params(query, [embedding, k])
79
+ end
80
+
81
+ result.to_a
82
+ end
83
+
84
+ # Ask a question and return the answer
85
+ # @param question [String] The question to ask
86
+ # @return [String] The answer to the question
87
+ def ask(question:)
88
+ search_results = similarity_search(query: question)
89
+
90
+ context = search_results.map do |result|
91
+ result["content"].to_s
92
+ end
93
+ context = context.join("\n---\n")
94
+
95
+ prompt = generate_prompt(question: question, context: context)
96
+
97
+ llm_client.chat(prompt: prompt)
98
+ end
99
+ end
100
+ end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.12"
4
+ VERSION = "0.3.13"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -164,6 +164,34 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '1.13'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pg
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pgvector
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "<"
186
+ - !ruby/object:Gem::Version
187
+ version: '0.2'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "<"
193
+ - !ruby/object:Gem::Version
194
+ version: '0.2'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: pdf-reader
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -290,8 +318,11 @@ files:
290
318
  - lib/langchain.rb
291
319
  - lib/langchain/loader.rb
292
320
  - lib/langchain/processors/base.rb
321
+ - lib/langchain/processors/csv.rb
293
322
  - lib/langchain/processors/docx.rb
294
323
  - lib/langchain/processors/html.rb
324
+ - lib/langchain/processors/json.rb
325
+ - lib/langchain/processors/jsonl.rb
295
326
  - lib/langchain/processors/pdf.rb
296
327
  - lib/langchain/processors/text.rb
297
328
  - lib/langchainrb.rb
@@ -312,6 +343,7 @@ files:
312
343
  - lib/vectorsearch/base.rb
313
344
  - lib/vectorsearch/chroma.rb
314
345
  - lib/vectorsearch/milvus.rb
346
+ - lib/vectorsearch/pgvector.rb
315
347
  - lib/vectorsearch/pinecone.rb
316
348
  - lib/vectorsearch/qdrant.rb
317
349
  - lib/vectorsearch/weaviate.rb