langchainrb 0.3.7 → 0.3.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93a3fcc195fbdf55ec52402c1db2f11c929069c03afa90477259e6bf2f542957
4
- data.tar.gz: 737e456d831e40e8c388a1986f2483f9dff3934c8b4e05a9456529e017075637
3
+ metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
+ data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
5
5
  SHA512:
6
- metadata.gz: 23619f8e256a9856eb113afce8eef94f759beb84644a87d9b67cac4fac9d5aedfc06978b3baedad1d988308b3cdb72d32e19a0f5e21d8b8431e8c9ff04eda548
7
- data.tar.gz: 6cd0fcc55553a5472e2ac6c69a6e49dbd4b52fe8bcc899c04710ae40397c39947a3418649737c0d21a99086f85736d52f5968b0cc8af761f186eedf790ba85db
6
+ metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
+ data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
data/.env.example CHANGED
@@ -1,3 +1,4 @@
1
+ CHROMA_URL=
1
2
  COHERE_API_KEY=
2
3
  HUGGING_FACE_API_KEY=
3
4
  MILVUS_URL=
data/CHANGELOG.md CHANGED
@@ -1,19 +1,26 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.8] - 2023-05-19
4
+ - 🔍 Vectorsearch
5
+ - Introduce support for Chroma DB
6
+
7
+ - 🚚 Loaders
8
+ - Bug fix `Loaders::Text` to only parse .txt files
9
+
3
10
  ## [0.3.7] - 2023-05-19
4
- - Loaders
11
+ - 🚚 Loaders
5
12
  - Introduce `Loaders::Text` to parse .txt files
6
- - Introduec `Loaders::PDF` to parse .pdf files
13
+ - Introduce `Loaders::PDF` to parse .pdf files
7
14
 
8
15
  ## [0.3.6] - 2023-05-17
9
- - LLMs
16
+ - 🗣️ LLMs
10
17
  - Bump `hugging-face` gem version
11
18
 
12
19
  ## [0.3.5] - 2023-05-16
13
20
  - Bug fixes
14
21
 
15
22
  ## [0.3.4] - 2023-05-16
16
- - LLMs
23
+ - 🗣️ LLMs
17
24
  - Introducing support for HuggingFace
18
25
 
19
26
  ## [0.3.3] - 2023-05-16
@@ -22,32 +29,28 @@
22
29
  - Use the Ruby logger
23
30
 
24
31
  ## [0.3.2] - 2023-05-15
25
- - Agents
32
+ - 🤖 Agents
26
33
  - Fix Chain of Thought prompt loader
27
34
 
28
35
  ## [0.3.1] - 2023-05-12
29
- - Tools
36
+ - 🛠️ Tools
30
37
  - Introducing `Tool::Wikipedia`, a tool that looks up Wikipedia entries
31
38
 
32
39
  ## [0.3.0] - 2023-05-12
33
-
34
- - Agents
40
+ - 🤖 Agents
35
41
  - Introducing `Agent::ChainOfThoughtAgent`, a semi-autonomous bot that uses Tools to retrieve additional information in order to make best-effort informed replies to user's questions.
36
- - Tools
42
+ - 🛠️ Tools
37
43
  - Introducing `Tool::Calculator` tool that solves mathematical expressions.
38
44
  - Introducing `Tool::Search` tool that executes Google Searches.
39
45
 
40
46
  ## [0.2.0] - 2023-05-09
41
-
42
- - Prompt Templating
47
+ - 📋 Prompt Templating
43
48
  - Ability to create prompt templates and save them to JSON files
44
49
  - Default `Prompt::FewShotPromptTemplate`
45
50
  - New examples added to `examples/`
46
51
 
47
52
  ## [0.1.4] - 2023-05-02
48
-
49
53
  - Backfilling missing specs
50
54
 
51
55
  ## [0.1.3] - 2023-05-01
52
-
53
56
  - Initial release
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.7)
4
+ langchainrb (0.3.8)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -31,6 +31,9 @@ GEM
31
31
  ast (2.4.2)
32
32
  builder (3.2.4)
33
33
  byebug (11.1.3)
34
+ chroma-db (0.3.0)
35
+ dry-monads (~> 1.6)
36
+ ruby-next-core (>= 0.15.0)
34
37
  coderay (1.1.3)
35
38
  cohere-ruby (0.9.3)
36
39
  faraday (~> 1)
@@ -54,6 +57,10 @@ GEM
54
57
  concurrent-ruby (~> 1.0)
55
58
  dry-core (~> 1.0, < 2)
56
59
  zeitwerk (~> 2.6)
60
+ dry-monads (1.6.0)
61
+ concurrent-ruby (~> 1.0)
62
+ dry-core (~> 1.0, < 2)
63
+ zeitwerk (~> 2.6)
57
64
  dry-schema (1.13.1)
58
65
  concurrent-ruby (~> 1.0)
59
66
  dry-configurable (~> 1.0, >= 1.0.1)
@@ -216,6 +223,7 @@ GEM
216
223
  rubocop-performance (1.16.0)
217
224
  rubocop (>= 1.7.0, < 2.0)
218
225
  rubocop-ast (>= 0.4.0)
226
+ ruby-next-core (0.15.3)
219
227
  ruby-openai (4.0.0)
220
228
  faraday (>= 1)
221
229
  faraday-multipart (>= 1)
@@ -253,9 +261,11 @@ GEM
253
261
  PLATFORMS
254
262
  arm64-darwin-22
255
263
  x86_64-darwin-19
264
+ x86_64-darwin-22
256
265
  x86_64-linux
257
266
 
258
267
  DEPENDENCIES
268
+ chroma-db (~> 0.3.0)
259
269
  cohere-ruby (~> 0.9.3)
260
270
  dotenv-rails (~> 2.7.6)
261
271
  eqn (~> 1.6.5)
data/README.md CHANGED
@@ -30,10 +30,11 @@ require "langchain"
30
30
 
31
31
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
32
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
34
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
35
- | Milvus | :white_check_mark: | WIP | WIP | WIP | | |
36
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
33
+ | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
+ | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
+ | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
+ | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
+ | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
38
 
38
39
  ### Using Vector Search Databases 🔍
39
40
 
@@ -54,6 +55,7 @@ client = Vectorsearch::Weaviate.new(
54
55
  client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
55
56
  client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
56
57
  client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
+ client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
57
59
  ```
58
60
 
59
61
  ```ruby
@@ -255,6 +257,8 @@ Need to read data from various sources? Load it up.
255
257
  | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
256
258
  | text | Loaders::Text | |
257
259
 
260
+ ## Examples
261
+ Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
258
262
 
259
263
  ## Logging
260
264
 
@@ -0,0 +1,36 @@
1
+ require "langchain"
2
+
3
+ # gem install chroma-db
4
+ # or add `gem "chroma-db", "~> 0.3.0"` to your Gemfile
5
+
6
+ # Instantiate the Chroma client
7
+ chroma = Vectorsearch::Chroma.new(
8
+ url: ENV["CHROMA_URL"],
9
+ index_name: "documents",
10
+ llm: :openai,
11
+ llm_api_key: ENV["OPENAI_API_KEY"]
12
+ )
13
+
14
+ # Create the default schema.
15
+ chroma.create_default_schema
16
+
17
+ # Set up an array of PDF and TXT documents
18
+ docs = [
19
+ Langchain.root.join("/docs/document.pdf"),
20
+ Langchain.root.join("/docs/document.txt")
21
+ ]
22
+
23
+ # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
24
+ chroma.add_texts(
25
+ texts: docs
26
+ )
27
+
28
+ # Query your data
29
+ chroma.similarity_search(
30
+ query: "..."
31
+ )
32
+
33
+ # Interact with your index through Q&A
34
+ chroma.ask(
35
+ question: "..."
36
+ )
data/lib/langchain.rb CHANGED
@@ -24,6 +24,7 @@ end
24
24
 
25
25
  module Vectorsearch
26
26
  autoload :Base, "vectorsearch/base"
27
+ autoload :Chroma, "vectorsearch/chroma"
27
28
  autoload :Milvus, "vectorsearch/milvus"
28
29
  autoload :Pinecone, "vectorsearch/pinecone"
29
30
  autoload :Qdrant, "vectorsearch/qdrant"
data/lib/llm/cohere.rb CHANGED
@@ -51,7 +51,5 @@ module LLM
51
51
  def chat(...)
52
52
  complete(...)
53
53
  end
54
-
55
- alias_method :generate_embedding, :embed
56
54
  end
57
55
  end
data/lib/llm/openai.rb CHANGED
@@ -71,7 +71,5 @@ module LLM
71
71
  response = client.chat(parameters: default_params)
72
72
  response.dig("choices", 0, "message", "content")
73
73
  end
74
-
75
- alias_method :generate_embedding, :embed
76
74
  end
77
75
  end
data/lib/loaders/text.rb CHANGED
@@ -12,7 +12,7 @@ module Loaders
12
12
  #
13
13
 
14
14
  def loadable?
15
- true
15
+ @path.to_s.end_with?(".txt")
16
16
  end
17
17
 
18
18
  def load
@@ -50,7 +50,6 @@ module Vectorsearch
50
50
  end
51
51
 
52
52
  def_delegators :llm_client,
53
- :generate_embedding,
54
53
  :default_dimension
55
54
 
56
55
  def generate_prompt(question:, context:)
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ class Chroma < Base
5
+ # Initialize the Chroma client
6
+ # @param url [String] The URL of the Qdrant server
7
+ # @param api_key [String] The API key to use
8
+ # @param index_name [String] The name of the index to use
9
+ # @param llm [Symbol] The LLM to use
10
+ # @param llm_api_key [String] The API key for the LLM
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ depends_on "chroma-db"
13
+ require "chroma-db"
14
+
15
+ ::Chroma.connect_host = url
16
+ ::Chroma.logger = Langchain.logger
17
+ ::Chroma.log_level = Langchain.logger.level
18
+
19
+ @index_name = index_name
20
+
21
+ super(llm: llm, llm_api_key: llm_api_key)
22
+ end
23
+
24
+ # Add a list of texts to the index
25
+ # @param texts [Array] The list of texts to add
26
+ # @return [Hash] The response from the server
27
+ def add_texts(texts:)
28
+ embeddings = Array(texts).map do |text|
29
+ ::Chroma::Resources::Embedding.new(
30
+ # TODO: Add support for passing your own IDs
31
+ id: SecureRandom.uuid,
32
+ embedding: llm_client.embed(text: text),
33
+ # TODO: Add support for passing metadata
34
+ metadata: [], # metadatas[index],
35
+ document: text # Do we actually need to store the whole original document?
36
+ )
37
+ end
38
+
39
+ collection = ::Chroma::Resources::Collection.get(index_name)
40
+ collection.add(embeddings)
41
+ end
42
+
43
+ # Create the collection with the default schema
44
+ # @return [Hash] The response from the server
45
+ def create_default_schema
46
+ ::Chroma::Resources::Collection.create(index_name)
47
+ end
48
+
49
+ # Search for similar texts
50
+ # @param query [String] The text to search for
51
+ # @param k [Integer] The number of results to return
52
+ # @return [Chroma::Resources::Embedding] The response from the server
53
+ def similarity_search(
54
+ query:,
55
+ k: 4
56
+ )
57
+ embedding = llm_client.embed(text: query)
58
+
59
+ similarity_search_by_vector(
60
+ embedding: embedding,
61
+ k: k
62
+ )
63
+ end
64
+
65
+ # Search for similar texts by embedding
66
+ # @param embedding [Array] The embedding to search for
67
+ # @param k [Integer] The number of results to return
68
+ # @return [Chroma::Resources::Embedding] The response from the server
69
+ def similarity_search_by_vector(
70
+ embedding:,
71
+ k: 4
72
+ )
73
+ # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
74
+ # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
75
+ count = collection.count
76
+ n_results = [count, k].min
77
+
78
+ collection.query(query_embeddings: [embedding], results: n_results)
79
+ end
80
+
81
+ # Ask a question and return the answer
82
+ # @param question [String] The question to ask
83
+ # @return [String] The answer to the question
84
+ def ask(question:)
85
+ search_results = similarity_search(query: question)
86
+
87
+ context = search_results.map do |result|
88
+ result.document
89
+ end
90
+
91
+ context = context.join("\n---\n")
92
+
93
+ prompt = generate_prompt(question: question, context: context)
94
+
95
+ llm_client.chat(prompt: prompt)
96
+ end
97
+
98
+ private
99
+
100
+ # @return [Chroma::Resources::Collection] The collection
101
+ def collection
102
+ @collection ||= ::Chroma::Resources::Collection.get(index_name)
103
+ end
104
+ end
105
+ end
@@ -15,16 +15,16 @@ module Vectorsearch
15
15
  def add_texts(texts:)
16
16
  client.entities.insert(
17
17
  collection_name: index_name,
18
- num_rows: texts.count,
18
+ num_rows: Array(texts).size,
19
19
  fields_data: [
20
20
  {
21
21
  field_name: "content",
22
22
  type: ::Milvus::DATA_TYPES["varchar"],
23
- field: texts
23
+ field: Array(texts)
24
24
  }, {
25
25
  field_name: "vectors",
26
26
  type: ::Milvus::DATA_TYPES["binary_vector"],
27
- field: texts.map { |text| generate_embedding(text: text) }
27
+ field: Array(texts).map { |text| llm_client.embed(text: text) }
28
28
  }
29
29
  ]
30
30
  )
@@ -69,7 +69,7 @@ module Vectorsearch
69
69
  end
70
70
 
71
71
  def similarity_search(query:, k: 4)
72
- embedding = generate_embedding(text: query)
72
+ embedding = llm_client.embed(text: query)
73
73
 
74
74
  similarity_search_by_vector(
75
75
  embedding: embedding,
@@ -32,7 +32,7 @@ module Vectorsearch
32
32
  # TODO: Allows passing in your own IDs
33
33
  id: SecureRandom.uuid,
34
34
  metadata: {content: text},
35
- values: generate_embedding(text: text)
35
+ values: llm_client.embed(text: text)
36
36
  }
37
37
  end
38
38
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -27,9 +27,9 @@ module Vectorsearch
27
27
  def add_texts(texts:)
28
28
  batch = {ids: [], vectors: [], payloads: []}
29
29
 
30
- texts.each do |text|
30
+ Array(texts).each do |text|
31
31
  batch[:ids].push(SecureRandom.uuid)
32
- batch[:vectors].push(generate_embedding(text: text))
32
+ batch[:vectors].push(llm_client.embed(text: text))
33
33
  batch[:payloads].push({content: text})
34
34
  end
35
35
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -27,7 +27,7 @@ module Vectorsearch
27
27
  # @param texts [Array] The list of texts to add
28
28
  # @return [Hash] The response from the server
29
29
  def add_texts(texts:)
30
- objects = texts.map do |text|
30
+ objects = Array(texts).map do |text|
31
31
  {
32
32
  class: index_name,
33
33
  properties: {content: text}
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.7"
4
+ VERSION = "0.3.8"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-19 00:00:00.000000000 Z
11
+ date: 2023-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.9.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: chroma-db
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.3.0
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: eqn
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -207,9 +221,9 @@ files:
207
221
  - LICENSE.txt
208
222
  - README.md
209
223
  - Rakefile
210
- - examples/.keep
211
224
  - examples/create_and_manage_few_shot_prompt_templates.rb
212
225
  - examples/create_and_manage_prompt_templates.rb
226
+ - examples/pdf_store_and_query_with_chroma.rb
213
227
  - examples/store_and_query_with_pinecone.rb
214
228
  - examples/store_and_query_with_qdrant.rb
215
229
  - examples/store_and_query_with_weaviate.rb
@@ -235,6 +249,7 @@ files:
235
249
  - lib/tool/serp_api.rb
236
250
  - lib/tool/wikipedia.rb
237
251
  - lib/vectorsearch/base.rb
252
+ - lib/vectorsearch/chroma.rb
238
253
  - lib/vectorsearch/milvus.rb
239
254
  - lib/vectorsearch/pinecone.rb
240
255
  - lib/vectorsearch/qdrant.rb
data/examples/.keep DELETED
File without changes