langchainrb 0.3.10 → 0.3.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
4
- data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
3
+ metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
4
+ data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
5
5
  SHA512:
6
- metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
7
- data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
6
+ metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
7
+ data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4
data/.env.example CHANGED
@@ -3,6 +3,7 @@ COHERE_API_KEY=
3
3
  HUGGING_FACE_API_KEY=
4
4
  MILVUS_URL=
5
5
  OPENAI_API_KEY=
6
+ GOOGLE_PALM_API_KEY=
6
7
  PINECONE_API_KEY=
7
8
  PINECONE_ENVIRONMENT=
8
9
  REPLICATE_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.12] - 2023-05-25
4
+ - 🔍 Vectorsearch
5
+ - Introduce namespace support for Pinecone
6
+ - 🚚 Loaders
7
+ - Loaders overhaul
8
+
9
+ ## [0.3.11] - 2023-05-23
10
+ - 🗣️ LLMs
11
+ - Introducing support for Google PaLM (Pathways Language Model)
12
+ - Bug fixes and improvements
13
+
3
14
  ## [0.3.10] - 2023-05-19
4
15
  - 🗣️ LLMs
5
16
  - Introducing support for Replicate.com
data/Gemfile CHANGED
@@ -10,7 +10,3 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
-
14
- # TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
15
- # Most likely everything will just need to be updated to `faraday 2.x`
16
- gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
data/Gemfile.lock CHANGED
@@ -1,18 +1,7 @@
1
- GIT
2
- remote: https://github.com/andreibondarev/replicate-ruby.git
3
- revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
4
- branch: faraday-1.x
5
- specs:
6
- replicate-ruby (0.2.1)
7
- addressable
8
- faraday (>= 1.0)
9
- faraday-multipart
10
- faraday-retry
11
-
12
1
  PATH
13
2
  remote: .
14
3
  specs:
15
- langchainrb (0.3.10)
4
+ langchainrb (0.3.12)
16
5
 
17
6
  GEM
18
7
  remote: https://rubygems.org/
@@ -128,6 +117,9 @@ GEM
128
117
  faraday-retry (1.0.3)
129
118
  faraday_middleware (1.2.0)
130
119
  faraday (~> 1.0)
120
+ google_palm_api (0.1.0)
121
+ faraday (>= 1.0.0)
122
+ faraday_middleware (>= 1.0.0)
131
123
  google_search_results (2.0.1)
132
124
  graphlient (0.6.0)
133
125
  faraday (>= 1.0)
@@ -208,6 +200,11 @@ GEM
208
200
  rainbow (3.1.1)
209
201
  rake (13.0.6)
210
202
  regexp_parser (2.8.0)
203
+ replicate-ruby (0.2.2)
204
+ addressable
205
+ faraday (>= 1.0)
206
+ faraday-multipart
207
+ faraday-retry
211
208
  rexml (3.2.5)
212
209
  rspec (3.12.0)
213
210
  rspec-core (~> 3.12.0)
@@ -274,6 +271,7 @@ GEM
274
271
  zeitwerk (2.6.8)
275
272
 
276
273
  PLATFORMS
274
+ arm64-darwin-21
277
275
  arm64-darwin-22
278
276
  x86_64-darwin-19
279
277
  x86_64-darwin-22
@@ -285,16 +283,18 @@ DEPENDENCIES
285
283
  docx (~> 0.8.0)
286
284
  dotenv-rails (~> 2.7.6)
287
285
  eqn (~> 1.6.5)
286
+ google_palm_api (~> 0.1.0)
288
287
  google_search_results (~> 2.0.0)
289
288
  hugging-face (~> 0.3.4)
290
289
  langchainrb!
291
290
  milvus (~> 0.9.0)
291
+ nokogiri (~> 1.13)
292
292
  pdf-reader (~> 1.4)
293
293
  pinecone (~> 0.1.6)
294
294
  pry-byebug (~> 3.10.0)
295
295
  qdrant-ruby (~> 0.9.0)
296
296
  rake (~> 13.0)
297
- replicate-ruby!
297
+ replicate-ruby (~> 0.2.2)
298
298
  rspec (~> 3.0)
299
299
  ruby-openai (~> 4.0.0)
300
300
  standardrb
data/README.md CHANGED
@@ -136,11 +136,17 @@ cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
136
  ```
137
137
 
138
138
  #### Replicate
139
- Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
139
+ Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
140
140
  ```ruby
141
141
  cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
142
  ```
143
143
 
144
+ #### Google PaLM (Pathways Language Model)
145
+ Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
146
+ ```ruby
147
+ google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
148
+ ```
149
+
144
150
  ### Using Prompts 📋
145
151
 
146
152
  #### Prompt Templates
@@ -262,11 +268,28 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
262
268
 
263
269
  Need to read data from various sources? Load it up.
264
270
 
265
- | Name | Class | Gem Requirements |
266
- | ---- | ------------- | :--------------------------: |
267
- | docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
268
- | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
269
- | text | Loaders::Text | |
271
+ ##### Usage
272
+
273
+ Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
274
+
275
+ ```ruby
276
+ Langchaing::Loader.load('/path/to/file.pdf')
277
+ ```
278
+
279
+ or
280
+
281
+ ```ruby
282
+ Langchain::Loader.load('https://www.example.com/file.pdf')
283
+ ```
284
+
285
+ ##### Supported Formats
286
+
287
+ | Format | Pocessor | Gem Requirements |
288
+ | ------ | ---------------- | :--------------------------: |
289
+ | docx | Processors::Docx | `gem "docx", "~> 0.8.0"` |
290
+ | html | Processors::HTML | `gem "nokogiri", "~> 1.13"` |
291
+ | pdf | Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
292
+ | text | Processors::Text | |
270
293
 
271
294
  ## Examples
272
295
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -284,7 +307,7 @@ Langchain.logger.level = :info
284
307
 
285
308
  1. `git clone https://github.com/andreibondarev/langchainrb.git`
286
309
  2. `cp .env.example .env`, then fill out the environment variables in `.env`
287
- 3. `rspec spec/` to ensure that the tests pass
310
+ 3. `bundle exec rake` to ensure that the tests pass and to run standardrb
288
311
  4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
289
312
 
290
313
  ## Core Contributors
@@ -26,8 +26,8 @@ docs = [
26
26
  ]
27
27
 
28
28
  # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
29
- chroma.add_texts(
30
- texts: docs
29
+ chroma.add_data(
30
+ paths: docs
31
31
  )
32
32
 
33
33
  # Query your data
@@ -1,12 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ VersionError = Class.new(ScriptError)
4
+
3
5
  # This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
4
6
  # This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
5
7
  #
6
8
  # @param gem_name [String] The name of the gem to load
7
9
  # @return [Boolean] Whether or not the gem was loaded successfully
8
10
  # @raise [LoadError] If the gem is not installed
9
- # @raise [LoadError] If the gem is installed, but the version does not meet the requirements
11
+ # @raise [VersionError] If the gem is installed, but the version does not meet the requirements
10
12
  #
11
13
  def depends_on(gem_name)
12
14
  gem(gem_name) # require the gem
@@ -14,10 +16,12 @@ def depends_on(gem_name)
14
16
  return(true) unless defined?(Bundler) # If we're in a non-bundler environment, we're no longer able to determine if we'll meet requirements
15
17
 
16
18
  gem_version = Gem.loaded_specs[gem_name].version
17
- gem_requirement = Bundler.load.dependencies.find { |g| g.name == gem_name }.requirement
19
+ gem_requirement = Bundler.load.dependencies.find { |g| g.name == gem_name }&.requirement
20
+
21
+ raise LoadError unless gem_requirement
18
22
 
19
- if !gem_requirement.satisfied_by?(gem_version)
20
- raise "The #{gem_name} gem is installed, but version #{gem_requirement} is required. You have #{gem_version}."
23
+ unless gem_requirement.satisfied_by?(gem_version)
24
+ raise VersionError, "The #{gem_name} gem is installed, but version #{gem_requirement} is required. You have #{gem_version}."
21
25
  end
22
26
 
23
27
  true
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open-uri"
4
+
5
+ module Langchain
6
+ class Loader
7
+ class FileNotFound < StandardError; end
8
+
9
+ class UnknownFormatError < StandardError; end
10
+
11
+ URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
12
+
13
+ # Load data from a file or url
14
+ # Equivalent to Langchain::Loader.new(path).load
15
+ # @param path [String | Pathname] path to file or url
16
+ # @return [String] file content
17
+ def self.load(path)
18
+ new(path).load
19
+ end
20
+
21
+ # Initialize Langchain::Loader
22
+ # @param path [String | Pathname] path to file or url
23
+ # @return [Langchain::Loader] loader instance
24
+ def initialize(path)
25
+ @path = path
26
+ end
27
+
28
+ # Check if path is url
29
+ # @return [Boolean] true if path is url
30
+ def url?
31
+ return false if @path.is_a?(Pathname)
32
+
33
+ !!(@path =~ URI_REGEX)
34
+ end
35
+
36
+ # Load data from a file or url
37
+ # @return [String] file content
38
+ def load
39
+ url? ? from_url(@path) : from_path(@path)
40
+ end
41
+
42
+ private
43
+
44
+ def from_url(url)
45
+ process do
46
+ data = URI.parse(url).open
47
+ processor = find_processor(:CONTENT_TYPES, data.content_type)
48
+ [data, processor]
49
+ end
50
+ end
51
+
52
+ def from_path(path)
53
+ raise FileNotFound unless File.exist?(path)
54
+
55
+ process do
56
+ [File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
57
+ end
58
+ end
59
+
60
+ def process(&block)
61
+ data, processor = yield
62
+
63
+ raise UnknownFormatError unless processor
64
+
65
+ Langchain::Processors.const_get(processor).new.parse(data)
66
+ end
67
+
68
+ def find_processor(constant, value)
69
+ processors.find { |klass| processor_matches? "#{klass}::#{constant}", value }
70
+ end
71
+
72
+ def processor_matches?(constant, value)
73
+ Langchain::Processors.const_get(constant).include?(value)
74
+ end
75
+
76
+ def processors
77
+ Langchain::Processors.constants
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Base
6
+ EXTENSIONS = []
7
+ CONTENT_TYPES = []
8
+
9
+ def parse(data)
10
+ raise NotImplementedError
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Docx < Base
6
+ EXTENSIONS = [".docx"]
7
+ CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
8
+
9
+ def initialize
10
+ depends_on "docx"
11
+ require "docx"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [String]
17
+ def parse(data)
18
+ ::Docx::Document
19
+ .open(StringIO.new(data.read))
20
+ .text
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class HTML < Base
6
+ EXTENSIONS = [".html", ".htm"]
7
+ CONTENT_TYPES = ["text/html"]
8
+
9
+ # We only look for headings and paragraphs
10
+ TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
11
+
12
+ def initialize
13
+ depends_on "nokogiri"
14
+ require "nokogiri"
15
+ end
16
+
17
+ # Parse the document and return the text
18
+ # @param [File] data
19
+ # @return [String]
20
+ def parse(data)
21
+ Nokogiri::HTML(data.read)
22
+ .css(TEXT_CONTENT_TAGS.join(","))
23
+ .map(&:inner_text)
24
+ .join("\n\n")
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class PDF < Base
6
+ EXTENSIONS = [".pdf"]
7
+ CONTENT_TYPES = ["application/pdf"]
8
+
9
+ def initialize
10
+ depends_on "pdf-reader"
11
+ require "pdf-reader"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [String]
17
+ def parse(data)
18
+ ::PDF::Reader
19
+ .new(StringIO.new(data.read))
20
+ .pages
21
+ .map(&:text)
22
+ .join("\n\n")
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Text < Base
6
+ EXTENSIONS = [".txt"]
7
+ CONTENT_TYPES = ["text/plain"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [String]
12
+ def parse(data)
13
+ data.read
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/langchain.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "logger"
4
+ require "pathname"
4
5
 
5
6
  require_relative "./version"
6
7
  require_relative "./dependency_helper"
7
8
  module Langchain
8
9
  class << self
9
- attr_accessor :default_loaders
10
10
  attr_accessor :logger
11
11
 
12
12
  attr_reader :root
@@ -15,6 +15,16 @@ module Langchain
15
15
  @logger ||= ::Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
16
16
 
17
17
  @root = Pathname.new(__dir__)
18
+
19
+ autoload :Loader, "langchain/loader"
20
+
21
+ module Processors
22
+ autoload :Base, "langchain/processors/base"
23
+ autoload :PDF, "langchain/processors/pdf"
24
+ autoload :HTML, "langchain/processors/html"
25
+ autoload :Text, "langchain/processors/text"
26
+ autoload :Docx, "langchain/processors/docx"
27
+ end
18
28
  end
19
29
 
20
30
  module Agent
@@ -34,6 +44,7 @@ end
34
44
  module LLM
35
45
  autoload :Base, "llm/base"
36
46
  autoload :Cohere, "llm/cohere"
47
+ autoload :GooglePalm, "llm/google_palm"
37
48
  autoload :HuggingFace, "llm/hugging_face"
38
49
  autoload :OpenAI, "llm/openai"
39
50
  autoload :Replicate, "llm/replicate"
@@ -53,15 +64,3 @@ module Tool
53
64
  autoload :SerpApi, "tool/serp_api"
54
65
  autoload :Wikipedia, "tool/wikipedia"
55
66
  end
56
-
57
- module Loaders
58
- autoload :Base, "loaders/base"
59
- autoload :Docx, "loaders/docx"
60
- autoload :PDF, "loaders/pdf"
61
- autoload :Text, "loaders/text"
62
- end
63
-
64
- autoload :Loader, "loader"
65
-
66
- # Load the default Loaders
67
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
@@ -0,0 +1 @@
1
+ require "langchain"
data/lib/llm/base.rb CHANGED
@@ -7,9 +7,10 @@ module LLM
7
7
  # Currently supported LLMs
8
8
  # TODO: Add support for HuggingFace and other LLMs
9
9
  LLMS = {
10
- openai: "OpenAI",
11
10
  cohere: "Cohere",
11
+ google_palm: "GooglePalm",
12
12
  huggingface: "HuggingFace",
13
+ openai: "OpenAI",
13
14
  replicate: "Replicate"
14
15
  }.freeze
15
16
 
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ class GooglePalm < Base
5
+ # Wrapper around the Google PaLM (Pathways Language Model) APIs.
6
+
7
+ DEFAULTS = {
8
+ temperature: 0.0,
9
+ dimension: 768 # This is what the `embedding-gecko-001` model generates
10
+ }.freeze
11
+
12
+ def initialize(api_key:)
13
+ depends_on "google_palm_api"
14
+ require "google_palm_api"
15
+
16
+ @client = ::GooglePalmApi::Client.new(api_key: api_key)
17
+ end
18
+
19
+ #
20
+ # Generate an embedding for a given text
21
+ #
22
+ # @param text [String] The text to generate an embedding for
23
+ # @return [Array] The embedding
24
+ #
25
+ def embed(text:)
26
+ response = client.embed(
27
+ text: text
28
+ )
29
+ response.dig("embedding", "value")
30
+ end
31
+
32
+ #
33
+ # Generate a completion for a given prompt
34
+ #
35
+ # @param prompt [String] The prompt to generate a completion for
36
+ # @return [String] The completion
37
+ #
38
+ def complete(prompt:, **params)
39
+ default_params = {
40
+ prompt: prompt,
41
+ temperature: DEFAULTS[:temperature]
42
+ }
43
+
44
+ if params[:stop_sequences]
45
+ default_params[:stop_sequences] = params.delete(:stop_sequences)
46
+ end
47
+
48
+ if params[:max_tokens]
49
+ default_params[:max_output_tokens] = params.delete(:max_tokens)
50
+ end
51
+
52
+ default_params.merge!(params)
53
+
54
+ response = client.generate_text(**default_params)
55
+ response.dig("candidates", 0, "output")
56
+ end
57
+
58
+ #
59
+ # Generate a chat completion for a given prompt
60
+ #
61
+ # @param prompt [String] The prompt to generate a chat completion for
62
+ # @return [String] The chat completion
63
+ #
64
+ def chat(prompt:, **params)
65
+ # TODO: Figure out how to introduce persisted conversations
66
+ default_params = {
67
+ prompt: prompt,
68
+ temperature: DEFAULTS[:temperature]
69
+ }
70
+
71
+ if params[:stop_sequences]
72
+ default_params[:stop] = params.delete(:stop_sequences)
73
+ end
74
+
75
+ if params[:max_tokens]
76
+ default_params[:max_output_tokens] = params.delete(:max_tokens)
77
+ end
78
+
79
+ default_params.merge!(params)
80
+
81
+ response = client.generate_chat_message(**default_params)
82
+ response.dig("candidates", 0, "content")
83
+ end
84
+ end
85
+ end
data/lib/prompt/base.rb CHANGED
@@ -66,6 +66,8 @@ module Prompt
66
66
  # contained within the template. Input variables are defined as text enclosed in
67
67
  # curly braces (e.g. "{variable_name}").
68
68
  #
69
+ # Content within two consecutive curly braces (e.g. "{{ignore_me}}) are ignored.
70
+ #
69
71
  # @param template [String] The template string to extract variables from.
70
72
  #
71
73
  # @return [Array<String>] An array of input variable names.
@@ -74,9 +76,9 @@ module Prompt
74
76
  input_variables = []
75
77
  scanner = StringScanner.new(template)
76
78
 
77
- while scanner.scan_until(/\{([^{}]*)\}/)
79
+ while scanner.scan_until(/\{([^}]*)\}/)
78
80
  variable = scanner[1].strip
79
- input_variables << variable unless variable.empty?
81
+ input_variables << variable unless variable.empty? || variable[0] == "{"
80
82
  end
81
83
 
82
84
  input_variables
@@ -20,7 +20,7 @@ module Prompt
20
20
  end
21
21
 
22
22
  #
23
- # Format the prompt with the inputs.
23
+ # Format the prompt with the inputs. Double {{}} replaced with single {} to adhere to f-string spec.
24
24
  #
25
25
  # @param kwargs [Hash] Any arguments to be passed to the prompt template.
26
26
  # @return [String] A formatted string.
@@ -28,7 +28,7 @@ module Prompt
28
28
  def format(**kwargs)
29
29
  result = @template
30
30
  kwargs.each { |key, value| result = result.gsub(/\{#{key}\}/, value.to_s) }
31
- result
31
+ result.gsub(/{{/, "{").gsub(/}}/, "}")
32
32
  end
33
33
 
34
34
  #
@@ -19,8 +19,6 @@ module Vectorsearch
19
19
  @llm_api_key = llm_api_key
20
20
 
21
21
  @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
-
23
- @loaders = Langchain.default_loaders
24
22
  end
25
23
 
26
24
  # Method supported by Vectorsearch DB to create a default schema
@@ -74,18 +72,12 @@ module Vectorsearch
74
72
  raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
75
73
  raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
76
74
 
77
- texts =
78
- Loader
79
- .with(*loaders)
80
- .load(path || paths)
75
+ texts = Array(path || paths)
76
+ .flatten
77
+ .map { |path| Langchain::Loader.new(path)&.load }
78
+ .compact
81
79
 
82
80
  add_texts(texts: texts)
83
81
  end
84
-
85
- attr_reader :loaders
86
-
87
- def add_loader(*loaders)
88
- loaders.each { |loader| @loaders << loader }
89
- end
90
82
  end
91
83
  end
@@ -25,20 +25,22 @@ module Vectorsearch
25
25
 
26
26
  # Add a list of texts to the index
27
27
  # @param texts [Array] The list of texts to add
28
+ # @param namespace [String] The namespace to add the texts to
29
+ # @param metadata [Hash] The metadata to use for the texts
28
30
  # @return [Hash] The response from the server
29
- def add_texts(texts:)
31
+ def add_texts(texts:, namespace: "", metadata: nil)
30
32
  vectors = texts.map do |text|
31
33
  {
32
34
  # TODO: Allows passing in your own IDs
33
35
  id: SecureRandom.uuid,
34
- metadata: {content: text},
36
+ metadata: metadata || {content: text},
35
37
  values: llm_client.embed(text: text)
36
38
  }
37
39
  end
38
40
 
39
41
  index = client.index(index_name)
40
42
 
41
- index.upsert(vectors: vectors)
43
+ index.upsert(vectors: vectors, namespace: namespace)
42
44
  end
43
45
 
44
46
  # Create the index with the default schema
@@ -54,40 +56,54 @@ module Vectorsearch
54
56
  # Search for similar texts
55
57
  # @param query [String] The text to search for
56
58
  # @param k [Integer] The number of results to return
59
+ # @param namespace [String] The namespace to search in
60
+ # @param filter [String] The filter to use
57
61
  # @return [Array] The list of results
58
62
  def similarity_search(
59
63
  query:,
60
- k: 4
64
+ k: 4,
65
+ namespace: "",
66
+ filter: nil
61
67
  )
62
68
  embedding = llm_client.embed(text: query)
63
69
 
64
70
  similarity_search_by_vector(
65
71
  embedding: embedding,
66
- k: k
72
+ k: k,
73
+ namespace: namespace,
74
+ filter: filter
67
75
  )
68
76
  end
69
77
 
70
78
  # Search for similar texts by embedding
71
79
  # @param embedding [Array] The embedding to search for
72
80
  # @param k [Integer] The number of results to return
81
+ # @param namespace [String] The namespace to search in
82
+ # @param filter [String] The filter to use
73
83
  # @return [Array] The list of results
74
- def similarity_search_by_vector(embedding:, k: 4)
84
+ def similarity_search_by_vector(embedding:, k: 4, namespace: "", filter: nil)
75
85
  index = client.index(index_name)
76
86
 
77
- response = index.query(
87
+ query_params = {
78
88
  vector: embedding,
89
+ namespace: namespace,
90
+ filter: filter,
79
91
  top_k: k,
80
92
  include_values: true,
81
93
  include_metadata: true
82
- )
94
+ }.compact
95
+
96
+ response = index.query(query_params)
83
97
  response.dig("matches")
84
98
  end
85
99
 
86
100
  # Ask a question and return the answer
87
101
  # @param question [String] The question to ask
102
+ # @param namespace [String] The namespace to search in
103
+ # @param filter [String] The filter to use
88
104
  # @return [String] The answer to the question
89
- def ask(question:)
90
- search_results = similarity_search(query: question)
105
+ def ask(question:, namespace: "", filter: nil)
106
+ search_results = similarity_search(query: question, namespace: namespace, filter: filter)
91
107
 
92
108
  context = search_results.map do |result|
93
109
  result.dig("metadata").to_s
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.10"
4
+ VERSION = "0.3.12"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.10
4
+ version: 0.3.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-21 00:00:00.000000000 Z
11
+ date: 2023-05-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -94,6 +94,20 @@ dependencies:
94
94
  - - "~>"
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.6.5
97
+ - !ruby/object:Gem::Dependency
98
+ name: google_palm_api
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.1.0
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.1.0
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: google_search_results
99
113
  requirement: !ruby/object:Gem::Requirement
@@ -136,6 +150,20 @@ dependencies:
136
150
  - - "~>"
137
151
  - !ruby/object:Gem::Version
138
152
  version: 0.9.0
153
+ - !ruby/object:Gem::Dependency
154
+ name: nokogiri
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.13'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.13'
139
167
  - !ruby/object:Gem::Dependency
140
168
  name: pdf-reader
141
169
  requirement: !ruby/object:Gem::Requirement
@@ -168,16 +196,16 @@ dependencies:
168
196
  name: replicate-ruby
169
197
  requirement: !ruby/object:Gem::Requirement
170
198
  requirements:
171
- - - ">="
199
+ - - "~>"
172
200
  - !ruby/object:Gem::Version
173
- version: '0'
201
+ version: 0.2.2
174
202
  type: :development
175
203
  prerelease: false
176
204
  version_requirements: !ruby/object:Gem::Requirement
177
205
  requirements:
178
- - - ">="
206
+ - - "~>"
179
207
  - !ruby/object:Gem::Version
180
- version: '0'
208
+ version: 0.2.2
181
209
  - !ruby/object:Gem::Dependency
182
210
  name: qdrant-ruby
183
211
  requirement: !ruby/object:Gem::Requirement
@@ -260,16 +288,19 @@ files:
260
288
  - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
261
289
  - lib/dependency_helper.rb
262
290
  - lib/langchain.rb
291
+ - lib/langchain/loader.rb
292
+ - lib/langchain/processors/base.rb
293
+ - lib/langchain/processors/docx.rb
294
+ - lib/langchain/processors/html.rb
295
+ - lib/langchain/processors/pdf.rb
296
+ - lib/langchain/processors/text.rb
297
+ - lib/langchainrb.rb
263
298
  - lib/llm/base.rb
264
299
  - lib/llm/cohere.rb
300
+ - lib/llm/google_palm.rb
265
301
  - lib/llm/hugging_face.rb
266
302
  - lib/llm/openai.rb
267
303
  - lib/llm/replicate.rb
268
- - lib/loader.rb
269
- - lib/loaders/base.rb
270
- - lib/loaders/docx.rb
271
- - lib/loaders/pdf.rb
272
- - lib/loaders/text.rb
273
304
  - lib/prompt/base.rb
274
305
  - lib/prompt/few_shot_prompt_template.rb
275
306
  - lib/prompt/loading.rb
data/lib/loader.rb DELETED
@@ -1,26 +0,0 @@
1
- module Loader
2
- def self.with(*loaders)
3
- LoaderSet.new(loaders)
4
- end
5
-
6
- class LoaderSet
7
- def initialize(loaders)
8
- @loaders = Array(loaders)
9
- end
10
-
11
- def load(*paths)
12
- Array(paths)
13
- .flatten
14
- .map { |path| first_loadable_loader(path)&.load }
15
- .compact
16
- end
17
-
18
- def first_loadable_loader(path)
19
- @loaders
20
- .each do |loader_klass|
21
- loader_instance = loader_klass.new(path)
22
- return(loader_instance) if loader_instance.loadable?
23
- end
24
- end
25
- end
26
- end
data/lib/loaders/base.rb DELETED
@@ -1,19 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # TODO: Add chunking options to the loaders
4
-
5
- module Loaders
6
- class Base
7
- def self.load(path)
8
- new.load(path)
9
- end
10
-
11
- def initialize(path)
12
- @path = path
13
- end
14
-
15
- def loadable?
16
- raise NotImplementedError
17
- end
18
- end
19
- end
data/lib/loaders/docx.rb DELETED
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class Docx < Base
5
- #
6
- # This Loader parses Docx files into text.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::Docx.new("path/to/my.docx").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.docx")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def initialize(path)
17
- depends_on "docx"
18
- require "docx"
19
-
20
- @path = path
21
- end
22
-
23
- # Check that the file is a `.docx` file
24
- def loadable?
25
- @path.to_s.end_with?(".docx")
26
- end
27
-
28
- def load
29
- ::Docx::Document
30
- .open(@path.to_s)
31
- .text
32
- end
33
- end
34
- end
data/lib/loaders/pdf.rb DELETED
@@ -1,36 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class PDF < Base
5
- #
6
- # This Loader parses PDF files into text.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::PDF.new("path/to/my.pdf").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.pdf")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def initialize(path)
17
- depends_on "pdf-reader"
18
- require "pdf-reader"
19
-
20
- @path = path
21
- end
22
-
23
- # Check that the file is a PDF file
24
- def loadable?
25
- @path.to_s.end_with?(".pdf")
26
- end
27
-
28
- def load
29
- ::PDF::Reader
30
- .new(@path)
31
- .pages
32
- .map(&:text)
33
- .join("\n\n")
34
- end
35
- end
36
- end
data/lib/loaders/text.rb DELETED
@@ -1,24 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Loaders
4
- class Text < Base
5
- #
6
- # This Loader parses .txt files.
7
- # If you'd like to use it directly you can do so like this:
8
- # Loaders::Text.new("path/to/my.txt").load
9
- #
10
- # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
- # qdrant = Vectorsearch::Qdrant.new(...)
12
- # path = Langchain.root.join("path/to/my.txt")
13
- # qdrant.add_data(path: path)
14
- #
15
-
16
- def loadable?
17
- @path.to_s.end_with?(".txt")
18
- end
19
-
20
- def load
21
- @path.read
22
- end
23
- end
24
- end