langchainrb 0.3.6 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 576c331bc4372bc1934a6a75fc496469242d0b645b56ffb9163cd9f812007414
4
- data.tar.gz: 82a9ea6c734cab1490fbe8060732c41c4aba6e3d06d6895ea40e9f2f5db2f6ac
3
+ metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
+ data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
5
5
  SHA512:
6
- metadata.gz: ce0325a59c2257e35c0be5e3e78ad44d046a058070585ba8127b97338c4280d4897f0dd070b16b9bea9cc0f5f7cd7f6b330611bb49f6180ecb0dafc55bb77d16
7
- data.tar.gz: f38012ae7d0da8c70d76f37f2a24431e709fe0841e62812f1471a1b9a2b33763235c4eb246bb6f20d61e50e903c137156abb851a25b4d46e4b7fd85333f2dfe2
6
+ metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
+ data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
data/.env.example ADDED
@@ -0,0 +1,12 @@
1
+ CHROMA_URL=
2
+ COHERE_API_KEY=
3
+ HUGGING_FACE_API_KEY=
4
+ MILVUS_URL=
5
+ OPENAI_API_KEY=
6
+ PINECONE_API_KEY=
7
+ PINECONE_ENVIRONMENT=
8
+ QDRANT_API_KEY=
9
+ QDRANT_URL=
10
+ SERPAPI_API_KEY=
11
+ WEAVIATE_API_KEY=
12
+ WEAVIATE_URL=
data/CHANGELOG.md CHANGED
@@ -1,14 +1,26 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.8] - 2023-05-19
4
+ - 🔍 Vectorsearch
5
+ - Introduce support for Chroma DB
6
+
7
+ - 🚚 Loaders
8
+ - Bug fix `Loaders::Text` to only parse .txt files
9
+
10
+ ## [0.3.7] - 2023-05-19
11
+ - 🚚 Loaders
12
+ - Introduce `Loaders::Text` to parse .txt files
13
+ - Introduce `Loaders::PDF` to parse .pdf files
14
+
3
15
  ## [0.3.6] - 2023-05-17
4
- - LLMs
16
+ - 🗣️ LLMs
5
17
  - Bump `hugging-face` gem version
6
18
 
7
19
  ## [0.3.5] - 2023-05-16
8
20
  - Bug fixes
9
21
 
10
22
  ## [0.3.4] - 2023-05-16
11
- - LLMs
23
+ - 🗣️ LLMs
12
24
  - Introducing support for HuggingFace
13
25
 
14
26
  ## [0.3.3] - 2023-05-16
@@ -17,32 +29,28 @@
17
29
  - Use the Ruby logger
18
30
 
19
31
  ## [0.3.2] - 2023-05-15
20
- - Agents
32
+ - 🤖 Agents
21
33
  - Fix Chain of Thought prompt loader
22
34
 
23
35
  ## [0.3.1] - 2023-05-12
24
- - Tools
36
+ - 🛠️ Tools
25
37
  - Introducing `Tool::Wikipedia`, a tool that looks up Wikipedia entries
26
38
 
27
39
  ## [0.3.0] - 2023-05-12
28
-
29
- - Agents
40
+ - 🤖 Agents
30
41
  - Introducing `Agent::ChainOfThoughtAgent`, a semi-autonomous bot that uses Tools to retrieve additional information in order to make best-effort informed replies to user's questions.
31
- - Tools
42
+ - 🛠️ Tools
32
43
  - Introducing `Tool::Calculator` tool that solves mathematical expressions.
33
44
  - Introducing `Tool::Search` tool that executes Google Searches.
34
45
 
35
46
  ## [0.2.0] - 2023-05-09
36
-
37
- - Prompt Templating
47
+ - 📋 Prompt Templating
38
48
  - Ability to create prompt templates and save them to JSON files
39
49
  - Default `Prompt::FewShotPromptTemplate`
40
50
  - New examples added to `examples/`
41
51
 
42
52
  ## [0.1.4] - 2023-05-02
43
-
44
53
  - Backfilling missing specs
45
54
 
46
55
  ## [0.1.3] - 2023-05-01
47
-
48
56
  - Initial release
data/Gemfile.lock CHANGED
@@ -1,11 +1,12 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.6)
4
+ langchainrb (0.3.8)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
8
8
  specs:
9
+ Ascii85 (1.0.3)
9
10
  actionpack (7.0.4.3)
10
11
  actionview (= 7.0.4.3)
11
12
  activesupport (= 7.0.4.3)
@@ -26,9 +27,13 @@ GEM
26
27
  tzinfo (~> 2.0)
27
28
  addressable (2.8.4)
28
29
  public_suffix (>= 2.0.2, < 6.0)
30
+ afm (0.2.2)
29
31
  ast (2.4.2)
30
32
  builder (3.2.4)
31
33
  byebug (11.1.3)
34
+ chroma-db (0.3.0)
35
+ dry-monads (~> 1.6)
36
+ ruby-next-core (>= 0.15.0)
32
37
  coderay (1.1.3)
33
38
  cohere-ruby (0.9.3)
34
39
  faraday (~> 1)
@@ -52,6 +57,10 @@ GEM
52
57
  concurrent-ruby (~> 1.0)
53
58
  dry-core (~> 1.0, < 2)
54
59
  zeitwerk (~> 2.6)
60
+ dry-monads (1.6.0)
61
+ concurrent-ruby (~> 1.0)
62
+ dry-core (~> 1.0, < 2)
63
+ zeitwerk (~> 2.6)
55
64
  dry-schema (1.13.1)
56
65
  concurrent-ruby (~> 1.0)
57
66
  dry-configurable (~> 1.0, >= 1.0.1)
@@ -114,6 +123,7 @@ GEM
114
123
  graphql-client (0.18.0)
115
124
  activesupport (>= 3.0)
116
125
  graphql
126
+ hashery (2.1.2)
117
127
  httparty (0.21.0)
118
128
  mini_mime (>= 1.0.0)
119
129
  multi_xml (>= 0.5.2)
@@ -144,6 +154,12 @@ GEM
144
154
  parallel (1.23.0)
145
155
  parser (3.2.2.1)
146
156
  ast (~> 2.4.1)
157
+ pdf-reader (1.4.1)
158
+ Ascii85 (~> 1.0.0)
159
+ afm (~> 0.2.1)
160
+ hashery (~> 2.0)
161
+ ruby-rc4
162
+ ttfunk
147
163
  pinecone (0.1.71)
148
164
  dry-struct (~> 1.6.0)
149
165
  dry-validation (~> 1.10.0)
@@ -207,10 +223,12 @@ GEM
207
223
  rubocop-performance (1.16.0)
208
224
  rubocop (>= 1.7.0, < 2.0)
209
225
  rubocop-ast (>= 0.4.0)
226
+ ruby-next-core (0.15.3)
210
227
  ruby-openai (4.0.0)
211
228
  faraday (>= 1)
212
229
  faraday-multipart (>= 1)
213
230
  ruby-progressbar (1.13.0)
231
+ ruby-rc4 (0.1.5)
214
232
  ruby2_keywords (0.0.5)
215
233
  standard (1.28.2)
216
234
  language_server-protocol (~> 3.17.0.2)
@@ -228,6 +246,7 @@ GEM
228
246
  thor (1.2.1)
229
247
  treetop (1.6.12)
230
248
  polyglot (~> 0.3)
249
+ ttfunk (1.7.0)
231
250
  tzinfo (2.0.6)
232
251
  concurrent-ruby (~> 1.0)
233
252
  unicode-display_width (2.4.2)
@@ -242,9 +261,11 @@ GEM
242
261
  PLATFORMS
243
262
  arm64-darwin-22
244
263
  x86_64-darwin-19
264
+ x86_64-darwin-22
245
265
  x86_64-linux
246
266
 
247
267
  DEPENDENCIES
268
+ chroma-db (~> 0.3.0)
248
269
  cohere-ruby (~> 0.9.3)
249
270
  dotenv-rails (~> 2.7.6)
250
271
  eqn (~> 1.6.5)
@@ -252,6 +273,7 @@ DEPENDENCIES
252
273
  hugging-face (~> 0.3.3)
253
274
  langchainrb!
254
275
  milvus (~> 0.9.0)
276
+ pdf-reader (~> 1.4)
255
277
  pinecone (~> 0.1.6)
256
278
  pry-byebug (~> 3.10.0)
257
279
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -30,10 +30,11 @@ require "langchain"
30
30
 
31
31
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
32
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
34
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
35
- | Milvus | :white_check_mark: | WIP | WIP | WIP | | |
36
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
33
+ | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
+ | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
+ | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
+ | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
+ | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
38
 
38
39
  ### Using Vector Search Databases 🔍
39
40
 
@@ -54,6 +55,7 @@ client = Vectorsearch::Weaviate.new(
54
55
  client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
55
56
  client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
56
57
  client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
+ client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
57
59
  ```
58
60
 
59
61
  ```ruby
@@ -62,7 +64,7 @@ client.create_default_schema
62
64
  ```
63
65
 
64
66
  ```ruby
65
- # Store your documents in your vector search database
67
+ # Store plain texts in your vector search database
66
68
  client.add_texts(
67
69
  texts: [
68
70
  "Begin by preheating your oven to 375°F (190°C). Prepare four boneless, skinless chicken breasts by cutting a pocket into the side of each breast, being careful not to cut all the way through. Season the chicken with salt and pepper to taste. In a large skillet, melt 2 tablespoons of unsalted butter over medium heat. Add 1 small diced onion and 2 minced garlic cloves, and cook until softened, about 3-4 minutes. Add 8 ounces of fresh spinach and cook until wilted, about 3 minutes. Remove the skillet from heat and let the mixture cool slightly.",
@@ -70,7 +72,13 @@ client.add_texts(
70
72
  ]
71
73
  )
72
74
  ```
75
+ ```ruby
76
+ # Store the contents of your files in your vector search database
77
+ my_pdf = Langchain.root.join("path/to/my.pdf")
78
+ my_text = Langchain.root.join("path/to/my.txt")
73
79
 
80
+ client.add_data(paths: [my_pdf, my_text])
81
+ ```
74
82
  ```ruby
75
83
  # Retrieve similar documents based on the query string passed in
76
84
  client.similarity_search(
@@ -233,12 +241,24 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
233
241
 
234
242
  #### Available Tools 🛠️
235
243
 
236
- | Name | Description | Requirements |
237
- | -------- | :------------------: | :------------------: |
238
- | "calculator" | Useful for getting the result of a math expression | |
239
- | "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key)
240
- | "wikipedia" | Calls Wikipedia API to retrieve the summary | |
244
+ | Name | Description | ENV Requirements | Gem Requirements |
245
+ | ------------ | :------------------------------------------------: | :-----------------------------------------------------------: | :---------------------------------------: |
246
+ | "calculator" | Useful for getting the result of a math expression | | `gem "eqn", "~> 1.6.5"` |
247
+ | "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` | |
248
+ | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
249
+
250
+
251
+ #### Loaders 🚚
241
252
 
253
+ Need to read data from various sources? Load it up.
254
+
255
+ | Name | Class | Gem Requirements |
256
+ | ---- | ------------- | :--------------------------: |
257
+ | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
258
+ | text | Loaders::Text | |
259
+
260
+ ## Examples
261
+ Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
242
262
 
243
263
  ## Logging
244
264
 
@@ -251,9 +271,10 @@ Langchain.logger.level = :info
251
271
 
252
272
  ## Development
253
273
 
254
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
255
-
256
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
274
+ 1. `git clone https://github.com/andreibondarev/langchainrb.git`
275
+ 2. `cp .env.example .env`, then fill out the environment variables in `.env`
276
+ 3. `rspec spec/` to ensure that the tests pass
277
+ 4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
257
278
 
258
279
  ## Core Contributors
259
280
  [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
@@ -261,8 +282,9 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
261
282
  ## Honorary Contributors
262
283
  [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
263
284
  [<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
285
+ [<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
264
286
 
265
- (Criteria of becoming an Honorary Contributor or Core Contributor is pending...)
287
+ (Criteria for becoming an Honorary Contributor or Core Contributor is pending...)
266
288
 
267
289
  ## Contributing
268
290
 
@@ -0,0 +1,36 @@
1
+ require "langchain"
2
+
3
+ # gem install chroma-db
4
+ # or add `gem "chroma-db", "~> 0.3.0"` to your Gemfile
5
+
6
+ # Instantiate the Chroma client
7
+ chroma = Vectorsearch::Chroma.new(
8
+ url: ENV["CHROMA_URL"],
9
+ index_name: "documents",
10
+ llm: :openai,
11
+ llm_api_key: ENV["OPENAI_API_KEY"]
12
+ )
13
+
14
+ # Create the default schema.
15
+ chroma.create_default_schema
16
+
17
+ # Set up an array of PDF and TXT documents
18
+ docs = [
19
+ Langchain.root.join("/docs/document.pdf"),
20
+ Langchain.root.join("/docs/document.txt")
21
+ ]
22
+
23
+ # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
24
+ chroma.add_texts(
25
+ texts: docs
26
+ )
27
+
28
+ # Query your data
29
+ chroma.similarity_search(
30
+ query: "..."
31
+ )
32
+
33
+ # Interact with your index through Q&A
34
+ chroma.ask(
35
+ question: "..."
36
+ )
@@ -43,7 +43,7 @@ module Agent
43
43
 
44
44
  loop do
45
45
  Langchain.logger.info("Agent: Passing the prompt to the #{llm} LLM")
46
- response = llm_client.generate_completion(
46
+ response = llm_client.complete(
47
47
  prompt: prompt,
48
48
  stop_sequences: ["Observation:"],
49
49
  max_tokens: 500
@@ -100,7 +100,7 @@ module Agent
100
100
  # @return [PromptTemplate] PromptTemplate instance
101
101
  def prompt_template
102
102
  @template ||= Prompt.load_from_path(
103
- file_path: Pathname.new(__dir__).join("chain_of_thought_agent_prompt.json")
103
+ file_path: Langchain.root.join("agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
104
104
  )
105
105
  end
106
106
  end
@@ -1,5 +1,13 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # This method requires and loads the given gem, and then checks to see if the version of the gem meets the requirements listed in `langchain.gemspec`
4
+ # This solution was built to avoid auto-loading every single gem in the Gemfile when the developer will mostly likely be only using a few of them.
5
+ #
6
+ # @param gem_name [String] The name of the gem to load
7
+ # @return [Boolean] Whether or not the gem was loaded successfully
8
+ # @raise [LoadError] If the gem is not installed
9
+ # @raise [LoadError] If the gem is installed, but the version does not meet the requirements
10
+ #
3
11
  def depends_on(gem_name)
4
12
  gem(gem_name) # require the gem
5
13
 
data/lib/langchain.rb CHANGED
@@ -1,8 +1,21 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "logger"
4
+
3
5
  require_relative "./version"
4
6
  require_relative "./dependency_helper"
5
- require_relative "./logging"
7
+ module Langchain
8
+ class << self
9
+ attr_accessor :default_loaders
10
+ attr_accessor :logger
11
+
12
+ attr_reader :root
13
+ end
14
+
15
+ @logger ||= ::Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
16
+
17
+ @root = Pathname.new(__dir__)
18
+ end
6
19
 
7
20
  module Agent
8
21
  autoload :Base, "agent/base"
@@ -11,6 +24,7 @@ end
11
24
 
12
25
  module Vectorsearch
13
26
  autoload :Base, "vectorsearch/base"
27
+ autoload :Chroma, "vectorsearch/chroma"
14
28
  autoload :Milvus, "vectorsearch/milvus"
15
29
  autoload :Pinecone, "vectorsearch/pinecone"
16
30
  autoload :Qdrant, "vectorsearch/qdrant"
@@ -38,3 +52,14 @@ module Tool
38
52
  autoload :SerpApi, "tool/serp_api"
39
53
  autoload :Wikipedia, "tool/wikipedia"
40
54
  end
55
+
56
+ module Loaders
57
+ autoload :Base, "loaders/base"
58
+ autoload :PDF, "loaders/pdf"
59
+ autoload :Text, "loaders/text"
60
+ end
61
+
62
+ autoload :Loader, "loader"
63
+
64
+ # Load the default Loaders
65
+ Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
data/lib/llm/base.rb CHANGED
@@ -16,6 +16,21 @@ module LLM
16
16
  self.class.const_get(:DEFAULTS).dig(:dimension)
17
17
  end
18
18
 
19
+ # Method supported by an LLM that generates a response for a given chat-style prompt
20
+ def chat(...)
21
+ raise NotImplementedError, "#{self.class.name} does not support chat"
22
+ end
23
+
24
+ # Method supported by an LLM that completes a given prompt
25
+ def complete(...)
26
+ raise NotImplementedError, "#{self.class.name} does not support completion"
27
+ end
28
+
29
+ # Method supported by an LLM that generates an embedding for a given text or array of texts
30
+ def embed(...)
31
+ raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
32
+ end
33
+
19
34
  # Ensure that the LLM value passed in is supported
20
35
  # @param llm [Symbol] The LLM to use
21
36
  def self.validate_llm!(llm:)
data/lib/llm/cohere.rb CHANGED
@@ -47,7 +47,9 @@ module LLM
47
47
  response.dig("generations").first.dig("text")
48
48
  end
49
49
 
50
- alias_method :generate_completion, :complete
51
- alias_method :generate_embedding, :embed
50
+ # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
51
+ def chat(...)
52
+ complete(...)
53
+ end
52
54
  end
53
55
  end
@@ -25,7 +25,7 @@ module LLM
25
25
  # @param text [String] The text to embed
26
26
  # @return [Array] The embedding
27
27
  def embed(text:)
28
- response = client.embedding(
28
+ client.embedding(
29
29
  input: text,
30
30
  model: DEFAULTS[:embeddings_model_name]
31
31
  )
data/lib/llm/openai.rb CHANGED
@@ -5,6 +5,7 @@ module LLM
5
5
  DEFAULTS = {
6
6
  temperature: 0.0,
7
7
  completion_model_name: "text-davinci-003",
8
+ chat_completion_model_name: "gpt-3.5-turbo",
8
9
  embeddings_model_name: "text-embedding-ada-002",
9
10
  dimension: 1536
10
11
  }.freeze
@@ -50,7 +51,25 @@ module LLM
50
51
  response.dig("choices", 0, "text")
51
52
  end
52
53
 
53
- alias_method :generate_completion, :complete
54
- alias_method :generate_embedding, :embed
54
+ # Generate a chat completion for a given prompt
55
+ # @param prompt [String] The prompt to generate a chat completion for
56
+ # @return [String] The chat completion
57
+ def chat(prompt:, **params)
58
+ default_params = {
59
+ model: DEFAULTS[:chat_completion_model_name],
60
+ temperature: DEFAULTS[:temperature],
61
+ # TODO: Figure out how to introduce persisted conversations
62
+ messages: [{role: "user", content: prompt}]
63
+ }
64
+
65
+ if params[:stop_sequences]
66
+ default_params[:stop] = params.delete(:stop_sequences)
67
+ end
68
+
69
+ default_params.merge!(params)
70
+
71
+ response = client.chat(parameters: default_params)
72
+ response.dig("choices", 0, "message", "content")
73
+ end
55
74
  end
56
75
  end
data/lib/loader.rb ADDED
@@ -0,0 +1,26 @@
1
+ module Loader
2
+ def self.with(*loaders)
3
+ LoaderSet.new(loaders)
4
+ end
5
+
6
+ class LoaderSet
7
+ def initialize(loaders)
8
+ @loaders = Array(loaders)
9
+ end
10
+
11
+ def load(*paths)
12
+ Array(paths)
13
+ .flatten
14
+ .map { |path| first_loadable_loader(path)&.load }
15
+ .compact
16
+ end
17
+
18
+ def first_loadable_loader(path)
19
+ @loaders
20
+ .each do |loader_klass|
21
+ loader_instance = loader_klass.new(path)
22
+ return(loader_instance) if loader_instance.loadable?
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loaders
4
+ class Base
5
+ def self.load(path)
6
+ new.load(path)
7
+ end
8
+
9
+ def initialize(path)
10
+ @path = path
11
+ end
12
+
13
+ def loadable?
14
+ raise NotImplementedError
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,34 @@
1
+ module Loaders
2
+ class PDF < Base
3
+ #
4
+ # This Loader parses PDF files into text.
5
+ # If you'd like to use it directly you can do so like this:
6
+ # Loaders::PDF.new("path/to/my.pdf").load
7
+ #
8
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
9
+ # qdrant = Vectorsearch::Qdrant.new(...)
10
+ # path = Langchain.root.join("path/to/my.pdf")
11
+ # qdrant.add_data(path: path)
12
+ #
13
+
14
+ def initialize(path)
15
+ depends_on "pdf-reader"
16
+ require "pdf-reader"
17
+
18
+ @path = path
19
+ end
20
+
21
+ # Check that the file is a PDF file
22
+ def loadable?
23
+ @path.to_s.end_with?(".pdf")
24
+ end
25
+
26
+ def load
27
+ ::PDF::Reader
28
+ .new(@path)
29
+ .pages
30
+ .map(&:text)
31
+ .join("\n\n")
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,22 @@
1
+ module Loaders
2
+ class Text < Base
3
+ #
4
+ # This Loader parses .txt files.
5
+ # If you'd like to use it directly you can do so like this:
6
+ # Loaders::Text.new("path/to/my.txt").load
7
+ #
8
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
9
+ # qdrant = Vectorsearch::Qdrant.new(...)
10
+ # path = Langchain.root.join("path/to/my.txt")
11
+ # qdrant.add_data(path: path)
12
+ #
13
+
14
+ def loadable?
15
+ @path.to_s.end_with?(".txt")
16
+ end
17
+
18
+ def load
19
+ @path.read
20
+ end
21
+ end
22
+ end
data/lib/tool/base.rb CHANGED
@@ -12,8 +12,7 @@ module Tool
12
12
  TOOLS = {
13
13
  "calculator" => "Tool::Calculator",
14
14
  "search" => "Tool::SerpApi",
15
- "wikipedia" => "Tool::Wikipedia",
16
- "news" => "Tool::News"
15
+ "wikipedia" => "Tool::Wikipedia"
17
16
  }
18
17
 
19
18
  def self.description(value)
@@ -19,24 +19,37 @@ module Vectorsearch
19
19
  @llm_api_key = llm_api_key
20
20
 
21
21
  @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
22
+
23
+ @loaders = Langchain.default_loaders
22
24
  end
23
25
 
26
+ # Method supported by Vectorsearch DB to create a default schema
24
27
  def create_default_schema
25
- raise NotImplementedError
28
+ raise NotImplementedError, "#{self.class.name} does not support creating a default schema"
29
+ end
30
+
31
+ # Method supported by Vectorsearch DB to add a list of texts to the index
32
+ def add_texts(...)
33
+ raise NotImplementedError, "#{self.class.name} does not support adding texts"
26
34
  end
27
35
 
28
- def add_texts(texts:)
29
- raise NotImplementedError
36
+ # Method supported by Vectorsearch DB to search for similar texts in the index
37
+ def similarity_search(...)
38
+ raise NotImplementedError, "#{self.class.name} does not support similarity search"
30
39
  end
31
40
 
32
- # NotImplementedError will be raised if the subclass does not implement this method
33
- def ask(question:)
34
- raise NotImplementedError
41
+ # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
42
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
43
+ def similarity_search_by_vector(...)
44
+ raise NotImplementedError, "#{self.class.name} does not support similarity search by vector"
45
+ end
46
+
47
+ # Method supported by Vectorsearch DB to answer a question given a context (data) pulled from your Vectorsearch DB.
48
+ def ask(...)
49
+ raise NotImplementedError, "#{self.class.name} does not support asking questions"
35
50
  end
36
51
 
37
52
  def_delegators :llm_client,
38
- :generate_embedding,
39
- :generate_completion,
40
53
  :default_dimension
41
54
 
42
55
  def generate_prompt(question:, context:)
@@ -56,5 +69,23 @@ module Vectorsearch
56
69
 
57
70
  prompt_template.format(question: question)
58
71
  end
72
+
73
+ def add_data(path: nil, paths: nil)
74
+ raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
75
+ raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
76
+
77
+ texts =
78
+ Loader
79
+ .with(*loaders)
80
+ .load(path || paths)
81
+
82
+ add_texts(texts: texts)
83
+ end
84
+
85
+ attr_reader :loaders
86
+
87
+ def add_loader(*loaders)
88
+ loaders.each { |loader| @loaders << loader }
89
+ end
59
90
  end
60
91
  end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ class Chroma < Base
5
+ # Initialize the Chroma client
6
+ # @param url [String] The URL of the Qdrant server
7
+ # @param api_key [String] The API key to use
8
+ # @param index_name [String] The name of the index to use
9
+ # @param llm [Symbol] The LLM to use
10
+ # @param llm_api_key [String] The API key for the LLM
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ depends_on "chroma-db"
13
+ require "chroma-db"
14
+
15
+ ::Chroma.connect_host = url
16
+ ::Chroma.logger = Langchain.logger
17
+ ::Chroma.log_level = Langchain.logger.level
18
+
19
+ @index_name = index_name
20
+
21
+ super(llm: llm, llm_api_key: llm_api_key)
22
+ end
23
+
24
+ # Add a list of texts to the index
25
+ # @param texts [Array] The list of texts to add
26
+ # @return [Hash] The response from the server
27
+ def add_texts(texts:)
28
+ embeddings = Array(texts).map do |text|
29
+ ::Chroma::Resources::Embedding.new(
30
+ # TODO: Add support for passing your own IDs
31
+ id: SecureRandom.uuid,
32
+ embedding: llm_client.embed(text: text),
33
+ # TODO: Add support for passing metadata
34
+ metadata: [], # metadatas[index],
35
+ document: text # Do we actually need to store the whole original document?
36
+ )
37
+ end
38
+
39
+ collection = ::Chroma::Resources::Collection.get(index_name)
40
+ collection.add(embeddings)
41
+ end
42
+
43
+ # Create the collection with the default schema
44
+ # @return [Hash] The response from the server
45
+ def create_default_schema
46
+ ::Chroma::Resources::Collection.create(index_name)
47
+ end
48
+
49
+ # Search for similar texts
50
+ # @param query [String] The text to search for
51
+ # @param k [Integer] The number of results to return
52
+ # @return [Chroma::Resources::Embedding] The response from the server
53
+ def similarity_search(
54
+ query:,
55
+ k: 4
56
+ )
57
+ embedding = llm_client.embed(text: query)
58
+
59
+ similarity_search_by_vector(
60
+ embedding: embedding,
61
+ k: k
62
+ )
63
+ end
64
+
65
+ # Search for similar texts by embedding
66
+ # @param embedding [Array] The embedding to search for
67
+ # @param k [Integer] The number of results to return
68
+ # @return [Chroma::Resources::Embedding] The response from the server
69
+ def similarity_search_by_vector(
70
+ embedding:,
71
+ k: 4
72
+ )
73
+ # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
74
+ # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
75
+ count = collection.count
76
+ n_results = [count, k].min
77
+
78
+ collection.query(query_embeddings: [embedding], results: n_results)
79
+ end
80
+
81
+ # Ask a question and return the answer
82
+ # @param question [String] The question to ask
83
+ # @return [String] The answer to the question
84
+ def ask(question:)
85
+ search_results = similarity_search(query: question)
86
+
87
+ context = search_results.map do |result|
88
+ result.document
89
+ end
90
+
91
+ context = context.join("\n---\n")
92
+
93
+ prompt = generate_prompt(question: question, context: context)
94
+
95
+ llm_client.chat(prompt: prompt)
96
+ end
97
+
98
+ private
99
+
100
+ # @return [Chroma::Resources::Collection] The collection
101
+ def collection
102
+ @collection ||= ::Chroma::Resources::Collection.get(index_name)
103
+ end
104
+ end
105
+ end
@@ -15,16 +15,16 @@ module Vectorsearch
15
15
  def add_texts(texts:)
16
16
  client.entities.insert(
17
17
  collection_name: index_name,
18
- num_rows: texts.count,
18
+ num_rows: Array(texts).size,
19
19
  fields_data: [
20
20
  {
21
21
  field_name: "content",
22
22
  type: ::Milvus::DATA_TYPES["varchar"],
23
- field: texts
23
+ field: Array(texts)
24
24
  }, {
25
25
  field_name: "vectors",
26
26
  type: ::Milvus::DATA_TYPES["binary_vector"],
27
- field: texts.map { |text| generate_embedding(text: text) }
27
+ field: Array(texts).map { |text| llm_client.embed(text: text) }
28
28
  }
29
29
  ]
30
30
  )
@@ -69,7 +69,7 @@ module Vectorsearch
69
69
  end
70
70
 
71
71
  def similarity_search(query:, k: 4)
72
- embedding = generate_embedding(text: query)
72
+ embedding = llm_client.embed(text: query)
73
73
 
74
74
  similarity_search_by_vector(
75
75
  embedding: embedding,
@@ -88,9 +88,5 @@ module Vectorsearch
88
88
  metric_type: "L2"
89
89
  )
90
90
  end
91
-
92
- def ask(question:)
93
- raise NotImplementedError
94
- end
95
91
  end
96
92
  end
@@ -32,7 +32,7 @@ module Vectorsearch
32
32
  # TODO: Allows passing in your own IDs
33
33
  id: SecureRandom.uuid,
34
34
  metadata: {content: text},
35
- values: generate_embedding(text: text)
35
+ values: llm_client.embed(text: text)
36
36
  }
37
37
  end
38
38
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -96,7 +96,7 @@ module Vectorsearch
96
96
 
97
97
  prompt = generate_prompt(question: question, context: context)
98
98
 
99
- generate_completion(prompt: prompt)
99
+ llm_client.chat(prompt: prompt)
100
100
  end
101
101
  end
102
102
  end
@@ -27,9 +27,9 @@ module Vectorsearch
27
27
  def add_texts(texts:)
28
28
  batch = {ids: [], vectors: [], payloads: []}
29
29
 
30
- texts.each do |text|
30
+ Array(texts).each do |text|
31
31
  batch[:ids].push(SecureRandom.uuid)
32
- batch[:vectors].push(generate_embedding(text: text))
32
+ batch[:vectors].push(llm_client.embed(text: text))
33
33
  batch[:payloads].push({content: text})
34
34
  end
35
35
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -96,7 +96,7 @@ module Vectorsearch
96
96
 
97
97
  prompt = generate_prompt(question: question, context: context)
98
98
 
99
- generate_completion(prompt: prompt)
99
+ llm_client.chat(prompt: prompt)
100
100
  end
101
101
  end
102
102
  end
@@ -27,7 +27,7 @@ module Vectorsearch
27
27
  # @param texts [Array] The list of texts to add
28
28
  # @return [Hash] The response from the server
29
29
  def add_texts(texts:)
30
- objects = texts.map do |text|
30
+ objects = Array(texts).map do |text|
31
31
  {
32
32
  class: index_name,
33
33
  properties: {content: text}
@@ -113,7 +113,7 @@ module Vectorsearch
113
113
 
114
114
  prompt = generate_prompt(question: question, context: context)
115
115
 
116
- generate_completion(prompt: prompt)
116
+ llm_client.chat(prompt: prompt)
117
117
  end
118
118
  end
119
119
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.6"
4
+ VERSION = "0.3.8"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.6
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-17 00:00:00.000000000 Z
11
+ date: 2023-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.9.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: chroma-db
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.3.0
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: eqn
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -108,6 +122,20 @@ dependencies:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
124
  version: 0.9.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: pdf-reader
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '1.4'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '1.4'
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: pinecone
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -185,6 +213,7 @@ executables: []
185
213
  extensions: []
186
214
  extra_rdoc_files: []
187
215
  files:
216
+ - ".env.example"
188
217
  - ".rspec"
189
218
  - CHANGELOG.md
190
219
  - Gemfile
@@ -192,9 +221,9 @@ files:
192
221
  - LICENSE.txt
193
222
  - README.md
194
223
  - Rakefile
195
- - examples/.keep
196
224
  - examples/create_and_manage_few_shot_prompt_templates.rb
197
225
  - examples/create_and_manage_prompt_templates.rb
226
+ - examples/pdf_store_and_query_with_chroma.rb
198
227
  - examples/store_and_query_with_pinecone.rb
199
228
  - examples/store_and_query_with_qdrant.rb
200
229
  - examples/store_and_query_with_weaviate.rb
@@ -207,7 +236,10 @@ files:
207
236
  - lib/llm/cohere.rb
208
237
  - lib/llm/hugging_face.rb
209
238
  - lib/llm/openai.rb
210
- - lib/logging.rb
239
+ - lib/loader.rb
240
+ - lib/loaders/base.rb
241
+ - lib/loaders/pdf.rb
242
+ - lib/loaders/text.rb
211
243
  - lib/prompt/base.rb
212
244
  - lib/prompt/few_shot_prompt_template.rb
213
245
  - lib/prompt/loading.rb
@@ -217,6 +249,7 @@ files:
217
249
  - lib/tool/serp_api.rb
218
250
  - lib/tool/wikipedia.rb
219
251
  - lib/vectorsearch/base.rb
252
+ - lib/vectorsearch/chroma.rb
220
253
  - lib/vectorsearch/milvus.rb
221
254
  - lib/vectorsearch/pinecone.rb
222
255
  - lib/vectorsearch/qdrant.rb
data/examples/.keep DELETED
File without changes
data/lib/logging.rb DELETED
@@ -1,13 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "logger"
4
-
5
- module Langchain
6
- def self.logger
7
- @@logger ||= Logger.new($stdout, level: :warn, formatter: ->(severity, datetime, progname, msg) { "[LangChain.rb] #{msg}\n" })
8
- end
9
-
10
- def self.logger=(instance)
11
- @@logger = instance
12
- end
13
- end