langchainrb 0.3.12 → 0.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
4
- data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
3
+ metadata.gz: 2ee811b2bac8fadea4d90c4212363a901829a4aac219da0f2a2dcbe7c6f59c5b
4
+ data.tar.gz: 8fa32e6df4aaf69cb6d29977913c1b8a30d6f65b777b1f90c8a7f504d869ca8f
5
5
  SHA512:
6
- metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
7
- data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4
6
+ metadata.gz: cbb7e0c975333248c01082a47f7096fb9d6807c3b7619424eb9348238008d7b4257518287d9358114bf4e3a589349520ebf71ace00bf1fe8906afd27e8b1418a
7
+ data.tar.gz: 759444abe0b17518c6ef31fed6980f6bc0d3d096606860c4d6fddb8baeda4e0a23fc3909e42eba0f32912a786abec76cac54384533db2787e05d741f0907fa1d
data/.env.example CHANGED
@@ -11,4 +11,5 @@ QDRANT_API_KEY=
11
11
  QDRANT_URL=
12
12
  SERPAPI_API_KEY=
13
13
  WEAVIATE_API_KEY=
14
- WEAVIATE_URL=
14
+ WEAVIATE_URL=
15
+ POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.13] - 2023-05-26
4
+ - 🔍 Vectorsearch
5
+ - Pgvector support
6
+ - 🚚 Loaders
7
+ - CSV loader
8
+ - JSON loader
9
+ - JSONL loader
10
+
3
11
  ## [0.3.12] - 2023-05-25
4
12
  - 🔍 Vectorsearch
5
13
  - Introduce namespace support for Pinecone
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.12)
4
+ langchainrb (0.3.13)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -148,9 +148,13 @@ GEM
148
148
  milvus (0.9.1)
149
149
  faraday (~> 1)
150
150
  mini_mime (1.1.2)
151
+ mini_portile2 (2.8.2)
151
152
  minitest (5.18.0)
152
153
  multi_xml (0.6.0)
153
154
  multipart-post (2.3.0)
155
+ nokogiri (1.14.3)
156
+ mini_portile2 (~> 2.8.0)
157
+ racc (~> 1.4)
154
158
  nokogiri (1.14.3-arm64-darwin)
155
159
  racc (~> 1.4)
156
160
  nokogiri (1.14.3-x86_64-darwin)
@@ -166,6 +170,8 @@ GEM
166
170
  hashery (~> 2.0)
167
171
  ruby-rc4
168
172
  ttfunk
173
+ pg (1.5.3)
174
+ pgvector (0.1.1)
169
175
  pinecone (0.1.71)
170
176
  dry-struct (~> 1.6.0)
171
177
  dry-validation (~> 1.10.0)
@@ -273,6 +279,7 @@ GEM
273
279
  PLATFORMS
274
280
  arm64-darwin-21
275
281
  arm64-darwin-22
282
+ ruby
276
283
  x86_64-darwin-19
277
284
  x86_64-darwin-22
278
285
  x86_64-linux
@@ -290,6 +297,8 @@ DEPENDENCIES
290
297
  milvus (~> 0.9.0)
291
298
  nokogiri (~> 1.13)
292
299
  pdf-reader (~> 1.4)
300
+ pg (~> 1.5)
301
+ pgvector (< 0.2)
293
302
  pinecone (~> 0.1.6)
294
303
  pry-byebug (~> 3.10.0)
295
304
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -284,12 +284,16 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
284
284
 
285
285
  ##### Supported Formats
286
286
 
287
- | Format | Pocessor | Gem Requirements |
288
- | ------ | ---------------- | :--------------------------: |
289
- | docx | Processors::Docx | `gem "docx", "~> 0.8.0"` |
290
- | html | Processors::HTML | `gem "nokogiri", "~> 1.13"` |
291
- | pdf | Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
292
- | text | Processors::Text | |
287
+
288
+ | Format | Pocessor | Gem Requirements |
289
+ | ------ | ---------------------------- | :--------------------------: |
290
+ | docx | Langchain::Processors::Docx | `gem "docx", "~> 0.8.0"` |
291
+ | html | Langchain::Processors::HTML | `gem "nokogiri", "~> 1.13"` |
292
+ | pdf | Langchain::Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
293
+ | text | Langchain::Processors::Text | |
294
+ | JSON | Langchain::Processors::JSON | |
295
+ | JSONL | Langchain::Processors::JSONL | |
296
+ | csv | Langchain::Processors::CSV | |
293
297
 
294
298
  ## Examples
295
299
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Langchain
6
+ module Processors
7
+ class CSV < Base
8
+ EXTENSIONS = [".csv"]
9
+ CONTENT_TYPES = ["text/csv"]
10
+
11
+ # Parse the document and return the text
12
+ # @param [File] data
13
+ # @return [Array of Hash]
14
+ def parse(data)
15
+ ::CSV.new(data.read).map do |row|
16
+ row.map(&:strip)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSON < Base
6
+ EXTENSIONS = [".json"]
7
+ CONTENT_TYPES = ["application/json"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Hash]
12
+ def parse(data)
13
+ ::JSON.parse(data.read)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSONL < Base
6
+ EXTENSIONS = [".jsonl"]
7
+ CONTENT_TYPES = ["application/jsonl", "application/json-lines", "application/jsonlines"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Array of Hash]
12
+ def parse(data)
13
+ data.read.lines.map do |line|
14
+ ::JSON.parse(line)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/langchain.rb CHANGED
@@ -20,10 +20,13 @@ module Langchain
20
20
 
21
21
  module Processors
22
22
  autoload :Base, "langchain/processors/base"
23
- autoload :PDF, "langchain/processors/pdf"
23
+ autoload :CSV, "langchain/processors/csv"
24
+ autoload :Docx, "langchain/processors/docx"
24
25
  autoload :HTML, "langchain/processors/html"
26
+ autoload :JSON, "langchain/processors/json"
27
+ autoload :JSONL, "langchain/processors/jsonl"
28
+ autoload :PDF, "langchain/processors/pdf"
25
29
  autoload :Text, "langchain/processors/text"
26
- autoload :Docx, "langchain/processors/docx"
27
30
  end
28
31
  end
29
32
 
@@ -37,6 +40,7 @@ module Vectorsearch
37
40
  autoload :Chroma, "vectorsearch/chroma"
38
41
  autoload :Milvus, "vectorsearch/milvus"
39
42
  autoload :Pinecone, "vectorsearch/pinecone"
43
+ autoload :Pgvector, "vectorsearch/pgvector"
40
44
  autoload :Qdrant, "vectorsearch/qdrant"
41
45
  autoload :Weaviate, "vectorsearch/weaviate"
42
46
  end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ # The PostgreSQL vector search adapter
5
+ class Pgvector < Base
6
+ # @param url [String] The URL of the PostgreSQL database
7
+ # @param index_name [String] The name of the table to use for the index
8
+ # @param llm [String] The URL of the Language Layer API
9
+ # @param llm_api_key [String] The API key for the Language Layer API
10
+ # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ require "pg"
13
+ require "pgvector"
14
+
15
+ @client = ::PG.connect(url)
16
+ registry = ::PG::BasicTypeRegistry.new.define_default_types
17
+ ::Pgvector::PG.register_vector(registry)
18
+ @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
+
20
+ @index_name = index_name
21
+
22
+ super(llm: llm, llm_api_key: llm_api_key)
23
+ end
24
+
25
+ # Add a list of texts to the index
26
+ # @param texts [Array<String>] The texts to add to the index
27
+ # @return [PG::Result] The response from the database
28
+ def add_texts(texts:)
29
+ data = texts.flat_map do |text|
30
+ [text, llm_client.embed(text: text)]
31
+ end
32
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
+ client.exec_params(
34
+ "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
35
+ data
36
+ )
37
+ end
38
+
39
+ # Create default schema
40
+ # @return [PG::Result] The response from the database
41
+ def create_default_schema
42
+ client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
+ client.exec(
44
+ <<~SQL
45
+ CREATE TABLE IF NOT EXISTS #{@index_name} (
46
+ id serial PRIMARY KEY,
47
+ content TEXT,
48
+ vectors VECTOR(#{default_dimension})
49
+ );
50
+ SQL
51
+ )
52
+ end
53
+
54
+ # Search for similar texts in the index
55
+ # @param query [String] The text to search for
56
+ # @param k [Integer] The number of top results to return
57
+ # @return [Array<Hash>] The results of the search
58
+ def similarity_search(query:, k: 4)
59
+ embedding = llm_client.embed(text: query)
60
+
61
+ similarity_search_by_vector(
62
+ embedding: embedding,
63
+ k: k
64
+ )
65
+ end
66
+
67
+ # Search for similar texts in the index by the passed in vector.
68
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
69
+ # @param embedding [Array<Float>] The vector to search for
70
+ # @param k [Integer] The number of top results to return
71
+ # @return [Array<Hash>] The results of the search
72
+ def similarity_search_by_vector(embedding:, k: 4)
73
+ result = client.transaction do |conn|
74
+ conn.exec("SET LOCAL ivfflat.probes = 10;")
75
+ query = <<~SQL
76
+ SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
77
+ SQL
78
+ conn.exec_params(query, [embedding, k])
79
+ end
80
+
81
+ result.to_a
82
+ end
83
+
84
+ # Ask a question and return the answer
85
+ # @param question [String] The question to ask
86
+ # @return [String] The answer to the question
87
+ def ask(question:)
88
+ search_results = similarity_search(query: question)
89
+
90
+ context = search_results.map do |result|
91
+ result["content"].to_s
92
+ end
93
+ context = context.join("\n---\n")
94
+
95
+ prompt = generate_prompt(question: question, context: context)
96
+
97
+ llm_client.chat(prompt: prompt)
98
+ end
99
+ end
100
+ end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.12"
4
+ VERSION = "0.3.13"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -164,6 +164,34 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '1.13'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pg
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pgvector
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "<"
186
+ - !ruby/object:Gem::Version
187
+ version: '0.2'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "<"
193
+ - !ruby/object:Gem::Version
194
+ version: '0.2'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: pdf-reader
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -290,8 +318,11 @@ files:
290
318
  - lib/langchain.rb
291
319
  - lib/langchain/loader.rb
292
320
  - lib/langchain/processors/base.rb
321
+ - lib/langchain/processors/csv.rb
293
322
  - lib/langchain/processors/docx.rb
294
323
  - lib/langchain/processors/html.rb
324
+ - lib/langchain/processors/json.rb
325
+ - lib/langchain/processors/jsonl.rb
295
326
  - lib/langchain/processors/pdf.rb
296
327
  - lib/langchain/processors/text.rb
297
328
  - lib/langchainrb.rb
@@ -312,6 +343,7 @@ files:
312
343
  - lib/vectorsearch/base.rb
313
344
  - lib/vectorsearch/chroma.rb
314
345
  - lib/vectorsearch/milvus.rb
346
+ - lib/vectorsearch/pgvector.rb
315
347
  - lib/vectorsearch/pinecone.rb
316
348
  - lib/vectorsearch/qdrant.rb
317
349
  - lib/vectorsearch/weaviate.rb