langchainrb 0.3.11 → 0.3.13

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ff7f99d961b09e827df297ddb3144821c9103fd40eabb32688ca92588a73415c
4
- data.tar.gz: bb83eaa99055cf45cceaccb18a84e9fd4ee3ea4a93a6a0c66e04ede43e5d4bc0
3
+ metadata.gz: 2ee811b2bac8fadea4d90c4212363a901829a4aac219da0f2a2dcbe7c6f59c5b
4
+ data.tar.gz: 8fa32e6df4aaf69cb6d29977913c1b8a30d6f65b777b1f90c8a7f504d869ca8f
5
5
  SHA512:
6
- metadata.gz: 40e5362520220d3ffc1b4c29c3e430b051de334c2f281d9cb7d7549a93be40b26b379dbd35d0c91ccb5010c1a495a653e31768f1b7a95bc087059d59339fd1a7
7
- data.tar.gz: 04f24944b590ee8b577419a95718ad6796bc4cdc34d52cf05e287806912c05ba8ace22e07181a8537892124881b65a8a221e1c75d8c6245231dd5660c6b4308c
6
+ metadata.gz: cbb7e0c975333248c01082a47f7096fb9d6807c3b7619424eb9348238008d7b4257518287d9358114bf4e3a589349520ebf71ace00bf1fe8906afd27e8b1418a
7
+ data.tar.gz: 759444abe0b17518c6ef31fed6980f6bc0d3d096606860c4d6fddb8baeda4e0a23fc3909e42eba0f32912a786abec76cac54384533db2787e05d741f0907fa1d
data/.env.example CHANGED
@@ -11,4 +11,5 @@ QDRANT_API_KEY=
11
11
  QDRANT_URL=
12
12
  SERPAPI_API_KEY=
13
13
  WEAVIATE_API_KEY=
14
- WEAVIATE_URL=
14
+ WEAVIATE_URL=
15
+ POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.13] - 2023-05-26
4
+ - 🔍 Vectorsearch
5
+ - Pgvector support
6
+ - 🚚 Loaders
7
+ - CSV loader
8
+ - JSON loader
9
+ - JSONL loader
10
+
11
+ ## [0.3.12] - 2023-05-25
12
+ - 🔍 Vectorsearch
13
+ - Introduce namespace support for Pinecone
14
+ - 🚚 Loaders
15
+ - Loaders overhaul
16
+
3
17
  ## [0.3.11] - 2023-05-23
4
18
  - 🗣️ LLMs
5
19
  - Introducing support for Google PaLM (Pathways Language Model)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.11)
4
+ langchainrb (0.3.13)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -148,9 +148,13 @@ GEM
148
148
  milvus (0.9.1)
149
149
  faraday (~> 1)
150
150
  mini_mime (1.1.2)
151
+ mini_portile2 (2.8.2)
151
152
  minitest (5.18.0)
152
153
  multi_xml (0.6.0)
153
154
  multipart-post (2.3.0)
155
+ nokogiri (1.14.3)
156
+ mini_portile2 (~> 2.8.0)
157
+ racc (~> 1.4)
154
158
  nokogiri (1.14.3-arm64-darwin)
155
159
  racc (~> 1.4)
156
160
  nokogiri (1.14.3-x86_64-darwin)
@@ -166,6 +170,8 @@ GEM
166
170
  hashery (~> 2.0)
167
171
  ruby-rc4
168
172
  ttfunk
173
+ pg (1.5.3)
174
+ pgvector (0.1.1)
169
175
  pinecone (0.1.71)
170
176
  dry-struct (~> 1.6.0)
171
177
  dry-validation (~> 1.10.0)
@@ -271,7 +277,9 @@ GEM
271
277
  zeitwerk (2.6.8)
272
278
 
273
279
  PLATFORMS
280
+ arm64-darwin-21
274
281
  arm64-darwin-22
282
+ ruby
275
283
  x86_64-darwin-19
276
284
  x86_64-darwin-22
277
285
  x86_64-linux
@@ -289,6 +297,8 @@ DEPENDENCIES
289
297
  milvus (~> 0.9.0)
290
298
  nokogiri (~> 1.13)
291
299
  pdf-reader (~> 1.4)
300
+ pg (~> 1.5)
301
+ pgvector (< 0.2)
292
302
  pinecone (~> 0.1.6)
293
303
  pry-byebug (~> 3.10.0)
294
304
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -268,12 +268,32 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
268
268
 
269
269
  Need to read data from various sources? Load it up.
270
270
 
271
- | Name | Class | Gem Requirements |
272
- | ---- | ------------- | :--------------------------: |
273
- | docx | Loaders::Docx | `gem "docx", "~> 0.8.0"` |
274
- | html | Loaders::HTML | `gem "nokogiri", "~> 1.13"` |
275
- | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
276
- | text | Loaders::Text | |
271
+ ##### Usage
272
+
273
+ Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
274
+
275
+ ```ruby
276
+ Langchaing::Loader.load('/path/to/file.pdf')
277
+ ```
278
+
279
+ or
280
+
281
+ ```ruby
282
+ Langchain::Loader.load('https://www.example.com/file.pdf')
283
+ ```
284
+
285
+ ##### Supported Formats
286
+
287
+
288
+ | Format | Pocessor | Gem Requirements |
289
+ | ------ | ---------------------------- | :--------------------------: |
290
+ | docx | Langchain::Processors::Docx | `gem "docx", "~> 0.8.0"` |
291
+ | html | Langchain::Processors::HTML | `gem "nokogiri", "~> 1.13"` |
292
+ | pdf | Langchain::Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
293
+ | text | Langchain::Processors::Text | |
294
+ | JSON | Langchain::Processors::JSON | |
295
+ | JSONL | Langchain::Processors::JSONL | |
296
+ | csv | Langchain::Processors::CSV | |
277
297
 
278
298
  ## Examples
279
299
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -26,8 +26,8 @@ docs = [
26
26
  ]
27
27
 
28
28
  # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
29
- chroma.add_texts(
30
- texts: docs
29
+ chroma.add_data(
30
+ paths: docs
31
31
  )
32
32
 
33
33
  # Query your data
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open-uri"
4
+
5
+ module Langchain
6
+ class Loader
7
+ class FileNotFound < StandardError; end
8
+
9
+ class UnknownFormatError < StandardError; end
10
+
11
+ URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
12
+
13
+ # Load data from a file or url
14
+ # Equivalent to Langchain::Loader.new(path).load
15
+ # @param path [String | Pathname] path to file or url
16
+ # @return [String] file content
17
+ def self.load(path)
18
+ new(path).load
19
+ end
20
+
21
+ # Initialize Langchain::Loader
22
+ # @param path [String | Pathname] path to file or url
23
+ # @return [Langchain::Loader] loader instance
24
+ def initialize(path)
25
+ @path = path
26
+ end
27
+
28
+ # Check if path is url
29
+ # @return [Boolean] true if path is url
30
+ def url?
31
+ return false if @path.is_a?(Pathname)
32
+
33
+ !!(@path =~ URI_REGEX)
34
+ end
35
+
36
+ # Load data from a file or url
37
+ # @return [String] file content
38
+ def load
39
+ url? ? from_url(@path) : from_path(@path)
40
+ end
41
+
42
+ private
43
+
44
+ def from_url(url)
45
+ process do
46
+ data = URI.parse(url).open
47
+ processor = find_processor(:CONTENT_TYPES, data.content_type)
48
+ [data, processor]
49
+ end
50
+ end
51
+
52
+ def from_path(path)
53
+ raise FileNotFound unless File.exist?(path)
54
+
55
+ process do
56
+ [File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
57
+ end
58
+ end
59
+
60
+ def process(&block)
61
+ data, processor = yield
62
+
63
+ raise UnknownFormatError unless processor
64
+
65
+ Langchain::Processors.const_get(processor).new.parse(data)
66
+ end
67
+
68
+ def find_processor(constant, value)
69
+ processors.find { |klass| processor_matches? "#{klass}::#{constant}", value }
70
+ end
71
+
72
+ def processor_matches?(constant, value)
73
+ Langchain::Processors.const_get(constant).include?(value)
74
+ end
75
+
76
+ def processors
77
+ Langchain::Processors.constants
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Base
6
+ EXTENSIONS = []
7
+ CONTENT_TYPES = []
8
+
9
+ def parse(data)
10
+ raise NotImplementedError
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Langchain
6
+ module Processors
7
+ class CSV < Base
8
+ EXTENSIONS = [".csv"]
9
+ CONTENT_TYPES = ["text/csv"]
10
+
11
+ # Parse the document and return the text
12
+ # @param [File] data
13
+ # @return [Array of Hash]
14
+ def parse(data)
15
+ ::CSV.new(data.read).map do |row|
16
+ row.map(&:strip)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Docx < Base
6
+ EXTENSIONS = [".docx"]
7
+ CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
8
+
9
+ def initialize
10
+ depends_on "docx"
11
+ require "docx"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [String]
17
+ def parse(data)
18
+ ::Docx::Document
19
+ .open(StringIO.new(data.read))
20
+ .text
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class HTML < Base
6
+ EXTENSIONS = [".html", ".htm"]
7
+ CONTENT_TYPES = ["text/html"]
8
+
9
+ # We only look for headings and paragraphs
10
+ TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
11
+
12
+ def initialize
13
+ depends_on "nokogiri"
14
+ require "nokogiri"
15
+ end
16
+
17
+ # Parse the document and return the text
18
+ # @param [File] data
19
+ # @return [String]
20
+ def parse(data)
21
+ Nokogiri::HTML(data.read)
22
+ .css(TEXT_CONTENT_TAGS.join(","))
23
+ .map(&:inner_text)
24
+ .join("\n\n")
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSON < Base
6
+ EXTENSIONS = [".json"]
7
+ CONTENT_TYPES = ["application/json"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Hash]
12
+ def parse(data)
13
+ ::JSON.parse(data.read)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSONL < Base
6
+ EXTENSIONS = [".jsonl"]
7
+ CONTENT_TYPES = ["application/jsonl", "application/json-lines", "application/jsonlines"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Array of Hash]
12
+ def parse(data)
13
+ data.read.lines.map do |line|
14
+ ::JSON.parse(line)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class PDF < Base
6
+ EXTENSIONS = [".pdf"]
7
+ CONTENT_TYPES = ["application/pdf"]
8
+
9
+ def initialize
10
+ depends_on "pdf-reader"
11
+ require "pdf-reader"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [String]
17
+ def parse(data)
18
+ ::PDF::Reader
19
+ .new(StringIO.new(data.read))
20
+ .pages
21
+ .map(&:text)
22
+ .join("\n\n")
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Text < Base
6
+ EXTENSIONS = [".txt"]
7
+ CONTENT_TYPES = ["text/plain"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [String]
12
+ def parse(data)
13
+ data.read
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/langchain.rb CHANGED
@@ -7,7 +7,6 @@ require_relative "./version"
7
7
  require_relative "./dependency_helper"
8
8
  module Langchain
9
9
  class << self
10
- attr_accessor :default_loaders
11
10
  attr_accessor :logger
12
11
 
13
12
  attr_reader :root
@@ -16,6 +15,19 @@ module Langchain
16
15
  @logger ||= ::Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
17
16
 
18
17
  @root = Pathname.new(__dir__)
18
+
19
+ autoload :Loader, "langchain/loader"
20
+
21
+ module Processors
22
+ autoload :Base, "langchain/processors/base"
23
+ autoload :CSV, "langchain/processors/csv"
24
+ autoload :Docx, "langchain/processors/docx"
25
+ autoload :HTML, "langchain/processors/html"
26
+ autoload :JSON, "langchain/processors/json"
27
+ autoload :JSONL, "langchain/processors/jsonl"
28
+ autoload :PDF, "langchain/processors/pdf"
29
+ autoload :Text, "langchain/processors/text"
30
+ end
19
31
  end
20
32
 
21
33
  module Agent
@@ -28,6 +40,7 @@ module Vectorsearch
28
40
  autoload :Chroma, "vectorsearch/chroma"
29
41
  autoload :Milvus, "vectorsearch/milvus"
30
42
  autoload :Pinecone, "vectorsearch/pinecone"
43
+ autoload :Pgvector, "vectorsearch/pgvector"
31
44
  autoload :Qdrant, "vectorsearch/qdrant"
32
45
  autoload :Weaviate, "vectorsearch/weaviate"
33
46
  end
@@ -55,16 +68,3 @@ module Tool
55
68
  autoload :SerpApi, "tool/serp_api"
56
69
  autoload :Wikipedia, "tool/wikipedia"
57
70
  end
58
-
59
- module Loaders
60
- autoload :Base, "loaders/base"
61
- autoload :Docx, "loaders/docx"
62
- autoload :PDF, "loaders/pdf"
63
- autoload :Text, "loaders/text"
64
- autoload :HTML, "loaders/html"
65
- end
66
-
67
- autoload :Loader, "loader"
68
-
69
- # Load the default Loaders
70
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
@@ -0,0 +1 @@
1
+ require "langchain"
@@ -19,8 +19,6 @@ module Vectorsearch
19
19
  @llm_api_key = llm_api_key
20
20
 
21
21
  @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
-
23
- @loaders = Langchain.default_loaders
24
22
  end
25
23
 
26
24
  # Method supported by Vectorsearch DB to create a default schema
@@ -74,18 +72,12 @@ module Vectorsearch
74
72
  raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
75
73
  raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
76
74
 
77
- texts =
78
- Loader
79
- .with(*loaders)
80
- .load(path || paths)
75
+ texts = Array(path || paths)
76
+ .flatten
77
+ .map { |path| Langchain::Loader.new(path)&.load }
78
+ .compact
81
79
 
82
80
  add_texts(texts: texts)
83
81
  end
84
-
85
- attr_reader :loaders
86
-
87
- def add_loader(*loaders)
88
- loaders.each { |loader| @loaders << loader }
89
- end
90
82
  end
91
83
  end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ # The PostgreSQL vector search adapter
5
+ class Pgvector < Base
6
+ # @param url [String] The URL of the PostgreSQL database
7
+ # @param index_name [String] The name of the table to use for the index
8
+ # @param llm [String] The URL of the Language Layer API
9
+ # @param llm_api_key [String] The API key for the Language Layer API
10
+ # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ require "pg"
13
+ require "pgvector"
14
+
15
+ @client = ::PG.connect(url)
16
+ registry = ::PG::BasicTypeRegistry.new.define_default_types
17
+ ::Pgvector::PG.register_vector(registry)
18
+ @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
+
20
+ @index_name = index_name
21
+
22
+ super(llm: llm, llm_api_key: llm_api_key)
23
+ end
24
+
25
+ # Add a list of texts to the index
26
+ # @param texts [Array<String>] The texts to add to the index
27
+ # @return [PG::Result] The response from the database
28
+ def add_texts(texts:)
29
+ data = texts.flat_map do |text|
30
+ [text, llm_client.embed(text: text)]
31
+ end
32
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
+ client.exec_params(
34
+ "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
35
+ data
36
+ )
37
+ end
38
+
39
+ # Create default schema
40
+ # @return [PG::Result] The response from the database
41
+ def create_default_schema
42
+ client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
+ client.exec(
44
+ <<~SQL
45
+ CREATE TABLE IF NOT EXISTS #{@index_name} (
46
+ id serial PRIMARY KEY,
47
+ content TEXT,
48
+ vectors VECTOR(#{default_dimension})
49
+ );
50
+ SQL
51
+ )
52
+ end
53
+
54
+ # Search for similar texts in the index
55
+ # @param query [String] The text to search for
56
+ # @param k [Integer] The number of top results to return
57
+ # @return [Array<Hash>] The results of the search
58
+ def similarity_search(query:, k: 4)
59
+ embedding = llm_client.embed(text: query)
60
+
61
+ similarity_search_by_vector(
62
+ embedding: embedding,
63
+ k: k
64
+ )
65
+ end
66
+
67
+ # Search for similar texts in the index by the passed in vector.
68
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
69
+ # @param embedding [Array<Float>] The vector to search for
70
+ # @param k [Integer] The number of top results to return
71
+ # @return [Array<Hash>] The results of the search
72
+ def similarity_search_by_vector(embedding:, k: 4)
73
+ result = client.transaction do |conn|
74
+ conn.exec("SET LOCAL ivfflat.probes = 10;")
75
+ query = <<~SQL
76
+ SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
77
+ SQL
78
+ conn.exec_params(query, [embedding, k])
79
+ end
80
+
81
+ result.to_a
82
+ end
83
+
84
+ # Ask a question and return the answer
85
+ # @param question [String] The question to ask
86
+ # @return [String] The answer to the question
87
+ def ask(question:)
88
+ search_results = similarity_search(query: question)
89
+
90
+ context = search_results.map do |result|
91
+ result["content"].to_s
92
+ end
93
+ context = context.join("\n---\n")
94
+
95
+ prompt = generate_prompt(question: question, context: context)
96
+
97
+ llm_client.chat(prompt: prompt)
98
+ end
99
+ end
100
+ end
@@ -25,20 +25,22 @@ module Vectorsearch
25
25
 
26
26
  # Add a list of texts to the index
27
27
  # @param texts [Array] The list of texts to add
28
+ # @param namespace [String] The namespace to add the texts to
29
+ # @param metadata [Hash] The metadata to use for the texts
28
30
  # @return [Hash] The response from the server
29
- def add_texts(texts:)
31
+ def add_texts(texts:, namespace: "", metadata: nil)
30
32
  vectors = texts.map do |text|
31
33
  {
32
34
  # TODO: Allows passing in your own IDs
33
35
  id: SecureRandom.uuid,
34
- metadata: {content: text},
36
+ metadata: metadata || {content: text},
35
37
  values: llm_client.embed(text: text)
36
38
  }
37
39
  end
38
40
 
39
41
  index = client.index(index_name)
40
42
 
41
- index.upsert(vectors: vectors)
43
+ index.upsert(vectors: vectors, namespace: namespace)
42
44
  end
43
45
 
44
46
  # Create the index with the default schema
@@ -54,40 +56,54 @@ module Vectorsearch
54
56
  # Search for similar texts
55
57
  # @param query [String] The text to search for
56
58
  # @param k [Integer] The number of results to return
59
+ # @param namespace [String] The namespace to search in
60
+ # @param filter [String] The filter to use
57
61
  # @return [Array] The list of results
58
62
  def similarity_search(
59
63
  query:,
60
- k: 4
64
+ k: 4,
65
+ namespace: "",
66
+ filter: nil
61
67
  )
62
68
  embedding = llm_client.embed(text: query)
63
69
 
64
70
  similarity_search_by_vector(
65
71
  embedding: embedding,
66
- k: k
72
+ k: k,
73
+ namespace: namespace,
74
+ filter: filter
67
75
  )
68
76
  end
69
77
 
70
78
  # Search for similar texts by embedding
71
79
  # @param embedding [Array] The embedding to search for
72
80
  # @param k [Integer] The number of results to return
81
+ # @param namespace [String] The namespace to search in
82
+ # @param filter [String] The filter to use
73
83
  # @return [Array] The list of results
74
- def similarity_search_by_vector(embedding:, k: 4)
84
+ def similarity_search_by_vector(embedding:, k: 4, namespace: "", filter: nil)
75
85
  index = client.index(index_name)
76
86
 
77
- response = index.query(
87
+ query_params = {
78
88
  vector: embedding,
89
+ namespace: namespace,
90
+ filter: filter,
79
91
  top_k: k,
80
92
  include_values: true,
81
93
  include_metadata: true
82
- )
94
+ }.compact
95
+
96
+ response = index.query(query_params)
83
97
  response.dig("matches")
84
98
  end
85
99
 
86
100
  # Ask a question and return the answer
87
101
  # @param question [String] The question to ask
102
+ # @param namespace [String] The namespace to search in
103
+ # @param filter [String] The filter to use
88
104
  # @return [String] The answer to the question
89
- def ask(question:)
90
- search_results = similarity_search(query: question)
105
+ def ask(question:, namespace: "", filter: nil)
106
+ search_results = similarity_search(query: question, namespace: namespace, filter: filter)
91
107
 
92
108
  context = search_results.map do |result|
93
109
  result.dig("metadata").to_s
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.11"
4
+ VERSION = "0.3.13"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.11
4
+ version: 0.3.13
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-24 00:00:00.000000000 Z
11
+ date: 2023-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -164,6 +164,34 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '1.13'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pg
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pgvector
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "<"
186
+ - !ruby/object:Gem::Version
187
+ version: '0.2'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "<"
193
+ - !ruby/object:Gem::Version
194
+ version: '0.2'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: pdf-reader
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -288,18 +316,22 @@ files:
288
316
  - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
289
317
  - lib/dependency_helper.rb
290
318
  - lib/langchain.rb
319
+ - lib/langchain/loader.rb
320
+ - lib/langchain/processors/base.rb
321
+ - lib/langchain/processors/csv.rb
322
+ - lib/langchain/processors/docx.rb
323
+ - lib/langchain/processors/html.rb
324
+ - lib/langchain/processors/json.rb
325
+ - lib/langchain/processors/jsonl.rb
326
+ - lib/langchain/processors/pdf.rb
327
+ - lib/langchain/processors/text.rb
328
+ - lib/langchainrb.rb
291
329
  - lib/llm/base.rb
292
330
  - lib/llm/cohere.rb
293
331
  - lib/llm/google_palm.rb
294
332
  - lib/llm/hugging_face.rb
295
333
  - lib/llm/openai.rb
296
334
  - lib/llm/replicate.rb
297
- - lib/loader.rb
298
- - lib/loaders/base.rb
299
- - lib/loaders/docx.rb
300
- - lib/loaders/html.rb
301
- - lib/loaders/pdf.rb
302
- - lib/loaders/text.rb
303
335
  - lib/prompt/base.rb
304
336
  - lib/prompt/few_shot_prompt_template.rb
305
337
  - lib/prompt/loading.rb
@@ -311,6 +343,7 @@ files:
311
343
  - lib/vectorsearch/base.rb
312
344
  - lib/vectorsearch/chroma.rb
313
345
  - lib/vectorsearch/milvus.rb
346
+ - lib/vectorsearch/pgvector.rb
314
347
  - lib/vectorsearch/pinecone.rb
315
348
  - lib/vectorsearch/qdrant.rb
316
349
  - lib/vectorsearch/weaviate.rb
data/lib/loader.rb DELETED
@@ -1,26 +0,0 @@
1
- module Loader
2
- def self.with(*loaders)
3
- LoaderSet.new(loaders)
4
- end
5
-
6
- class LoaderSet
7
- def initialize(loaders)
8
- @loaders = Array(loaders)
9
- end
10
-
11
- def load(*paths)
12
- Array(paths)
13
- .flatten
14
- .map { |path| first_loadable_loader(path)&.load }
15
- .compact
16
- end
17
-
18
- def first_loadable_loader(path)
19
- @loaders
20
- .each do |loader_klass|
21
- loader_instance = loader_klass.new(path)
22
- return(loader_instance) if loader_instance.loadable?
23
- end
24
- end
25
- end
26
- end
data/lib/loaders/base.rb DELETED
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # TODO: Add chunking options to the loaders
4
-
5
- module Loaders
6
- class Base
7
- def self.load(path)
8
- new.load(path)
9
- end
10
-
11
- def initialize(path)
12
- @path = path
13
- end
14
-
15
- def loadable?
16
- raise NotImplementedError
17
- end
18
- end
19
- end
data/lib/loaders/docx.rb DELETED
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class Docx < Base
5
- #
6
- # This Loader parses Docx files into text.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::Docx.new("path/to/my.docx").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.docx")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def initialize(path)
17
- depends_on "docx"
18
- require "docx"
19
-
20
- @path = path
21
- end
22
-
23
- # Check that the file is a `.docx` file
24
- def loadable?
25
- @path.to_s.end_with?(".docx")
26
- end
27
-
28
- def load
29
- ::Docx::Document
30
- .open(@path.to_s)
31
- .text
32
- end
33
- end
34
- end
data/lib/loaders/html.rb DELETED
@@ -1,38 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "open-uri"
4
-
5
- module Loaders
6
- class HTML < Base
7
- # We only look for headings and paragraphs
8
- TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
9
-
10
- #
11
- # This Loader parses URL into a text.
12
- # If you'd like to use it directly you can do so like this:
13
- # Loaders::URL.new("https://nokogiri.org/").load
14
- #
15
- def initialize(url)
16
- depends_on "nokogiri"
17
- require "nokogiri"
18
-
19
- @url = url
20
- end
21
-
22
- # Check that url is a valid URL
23
- def loadable?
24
- !!(@url =~ URI::DEFAULT_PARSER.make_regexp)
25
- end
26
-
27
- def load
28
- return unless response.status.first == "200"
29
-
30
- doc = Nokogiri::HTML(response.read)
31
- doc.css(TEXT_CONTENT_TAGS.join(",")).map(&:inner_text).join("\n\n")
32
- end
33
-
34
- def response
35
- @response ||= URI.parse(@url).open
36
- end
37
- end
38
- end
data/lib/loaders/pdf.rb DELETED
@@ -1,36 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class PDF < Base
5
- #
6
- # This Loader parses PDF files into text.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::PDF.new("path/to/my.pdf").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.pdf")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def initialize(path)
17
- depends_on "pdf-reader"
18
- require "pdf-reader"
19
-
20
- @path = path
21
- end
22
-
23
- # Check that the file is a PDF file
24
- def loadable?
25
- @path.to_s.end_with?(".pdf")
26
- end
27
-
28
- def load
29
- ::PDF::Reader
30
- .new(@path)
31
- .pages
32
- .map(&:text)
33
- .join("\n\n")
34
- end
35
- end
36
- end
data/lib/loaders/text.rb DELETED
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class Text < Base
5
- #
6
- # This Loader parses .txt files.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::Text.new("path/to/my.txt").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.txt")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def loadable?
17
- @path.to_s.end_with?(".txt")
18
- end
19
-
20
- def load
21
- @path.read
22
- end
23
- end
24
- end