langchainrb 0.3.7 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 93a3fcc195fbdf55ec52402c1db2f11c929069c03afa90477259e6bf2f542957
4
- data.tar.gz: 737e456d831e40e8c388a1986f2483f9dff3934c8b4e05a9456529e017075637
3
+ metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
+ data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
5
5
  SHA512:
6
- metadata.gz: 23619f8e256a9856eb113afce8eef94f759beb84644a87d9b67cac4fac9d5aedfc06978b3baedad1d988308b3cdb72d32e19a0f5e21d8b8431e8c9ff04eda548
7
- data.tar.gz: 6cd0fcc55553a5472e2ac6c69a6e49dbd4b52fe8bcc899c04710ae40397c39947a3418649737c0d21a99086f85736d52f5968b0cc8af761f186eedf790ba85db
6
+ metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
+ data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
data/.env.example CHANGED
@@ -1,3 +1,4 @@
1
+ CHROMA_URL=
1
2
  COHERE_API_KEY=
2
3
  HUGGING_FACE_API_KEY=
3
4
  MILVUS_URL=
data/CHANGELOG.md CHANGED
@@ -1,19 +1,26 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.8] - 2023-05-19
4
+ - 🔍 Vectorsearch
5
+ - Introduce support for Chroma DB
6
+
7
+ - 🚚 Loaders
8
+ - Bug fix `Loaders::Text` to only parse .txt files
9
+
3
10
  ## [0.3.7] - 2023-05-19
4
- - Loaders
11
+ - 🚚 Loaders
5
12
  - Introduce `Loaders::Text` to parse .txt files
6
- - Introduec `Loaders::PDF` to parse .pdf files
13
+ - Introduce `Loaders::PDF` to parse .pdf files
7
14
 
8
15
  ## [0.3.6] - 2023-05-17
9
- - LLMs
16
+ - 🗣️ LLMs
10
17
  - Bump `hugging-face` gem version
11
18
 
12
19
  ## [0.3.5] - 2023-05-16
13
20
  - Bug fixes
14
21
 
15
22
  ## [0.3.4] - 2023-05-16
16
- - LLMs
23
+ - 🗣️ LLMs
17
24
  - Introducing support for HuggingFace
18
25
 
19
26
  ## [0.3.3] - 2023-05-16
@@ -22,32 +29,28 @@
22
29
  - Use the Ruby logger
23
30
 
24
31
  ## [0.3.2] - 2023-05-15
25
- - Agents
32
+ - 🤖 Agents
26
33
  - Fix Chain of Thought prompt loader
27
34
 
28
35
  ## [0.3.1] - 2023-05-12
29
- - Tools
36
+ - 🛠️ Tools
30
37
  - Introducing `Tool::Wikipedia`, a tool that looks up Wikipedia entries
31
38
 
32
39
  ## [0.3.0] - 2023-05-12
33
-
34
- - Agents
40
+ - 🤖 Agents
35
41
  - Introducing `Agent::ChainOfThoughtAgent`, a semi-autonomous bot that uses Tools to retrieve additional information in order to make best-effort informed replies to user's questions.
36
- - Tools
42
+ - 🛠️ Tools
37
43
  - Introducing `Tool::Calculator` tool that solves mathematical expressions.
38
44
  - Introducing `Tool::Search` tool that executes Google Searches.
39
45
 
40
46
  ## [0.2.0] - 2023-05-09
41
-
42
- - Prompt Templating
47
+ - 📋 Prompt Templating
43
48
  - Ability to create prompt templates and save them to JSON files
44
49
  - Default `Prompt::FewShotPromptTemplate`
45
50
  - New examples added to `examples/`
46
51
 
47
52
  ## [0.1.4] - 2023-05-02
48
-
49
53
  - Backfilling missing specs
50
54
 
51
55
  ## [0.1.3] - 2023-05-01
52
-
53
56
  - Initial release
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.7)
4
+ langchainrb (0.3.8)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -31,6 +31,9 @@ GEM
31
31
  ast (2.4.2)
32
32
  builder (3.2.4)
33
33
  byebug (11.1.3)
34
+ chroma-db (0.3.0)
35
+ dry-monads (~> 1.6)
36
+ ruby-next-core (>= 0.15.0)
34
37
  coderay (1.1.3)
35
38
  cohere-ruby (0.9.3)
36
39
  faraday (~> 1)
@@ -54,6 +57,10 @@ GEM
54
57
  concurrent-ruby (~> 1.0)
55
58
  dry-core (~> 1.0, < 2)
56
59
  zeitwerk (~> 2.6)
60
+ dry-monads (1.6.0)
61
+ concurrent-ruby (~> 1.0)
62
+ dry-core (~> 1.0, < 2)
63
+ zeitwerk (~> 2.6)
57
64
  dry-schema (1.13.1)
58
65
  concurrent-ruby (~> 1.0)
59
66
  dry-configurable (~> 1.0, >= 1.0.1)
@@ -216,6 +223,7 @@ GEM
216
223
  rubocop-performance (1.16.0)
217
224
  rubocop (>= 1.7.0, < 2.0)
218
225
  rubocop-ast (>= 0.4.0)
226
+ ruby-next-core (0.15.3)
219
227
  ruby-openai (4.0.0)
220
228
  faraday (>= 1)
221
229
  faraday-multipart (>= 1)
@@ -253,9 +261,11 @@ GEM
253
261
  PLATFORMS
254
262
  arm64-darwin-22
255
263
  x86_64-darwin-19
264
+ x86_64-darwin-22
256
265
  x86_64-linux
257
266
 
258
267
  DEPENDENCIES
268
+ chroma-db (~> 0.3.0)
259
269
  cohere-ruby (~> 0.9.3)
260
270
  dotenv-rails (~> 2.7.6)
261
271
  eqn (~> 1.6.5)
data/README.md CHANGED
@@ -30,10 +30,11 @@ require "langchain"
30
30
 
31
31
  | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
32
  | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | | |
34
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | | |
35
- | Milvus | :white_check_mark: | WIP | WIP | WIP | | |
36
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | | |
33
+ | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
+ | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
+ | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
+ | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
+ | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
38
 
38
39
  ### Using Vector Search Databases 🔍
39
40
 
@@ -54,6 +55,7 @@ client = Vectorsearch::Weaviate.new(
54
55
  client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
55
56
  client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
56
57
  client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
58
+ client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
57
59
  ```
58
60
 
59
61
  ```ruby
@@ -255,6 +257,8 @@ Need to read data from various sources? Load it up.
255
257
  | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
256
258
  | text | Loaders::Text | |
257
259
 
260
+ ## Examples
261
+ Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
258
262
 
259
263
  ## Logging
260
264
 
@@ -0,0 +1,36 @@
1
+ require "langchain"
2
+
3
+ # gem install chroma-db
4
+ # or add `gem "chroma-db", "~> 0.3.0"` to your Gemfile
5
+
6
+ # Instantiate the Chroma client
7
+ chroma = Vectorsearch::Chroma.new(
8
+ url: ENV["CHROMA_URL"],
9
+ index_name: "documents",
10
+ llm: :openai,
11
+ llm_api_key: ENV["OPENAI_API_KEY"]
12
+ )
13
+
14
+ # Create the default schema.
15
+ chroma.create_default_schema
16
+
17
+ # Set up an array of PDF and TXT documents
18
+ docs = [
19
+ Langchain.root.join("/docs/document.pdf"),
20
+ Langchain.root.join("/docs/document.txt")
21
+ ]
22
+
23
+ # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
24
+ chroma.add_texts(
25
+ texts: docs
26
+ )
27
+
28
+ # Query your data
29
+ chroma.similarity_search(
30
+ query: "..."
31
+ )
32
+
33
+ # Interact with your index through Q&A
34
+ chroma.ask(
35
+ question: "..."
36
+ )
data/lib/langchain.rb CHANGED
@@ -24,6 +24,7 @@ end
24
24
 
25
25
  module Vectorsearch
26
26
  autoload :Base, "vectorsearch/base"
27
+ autoload :Chroma, "vectorsearch/chroma"
27
28
  autoload :Milvus, "vectorsearch/milvus"
28
29
  autoload :Pinecone, "vectorsearch/pinecone"
29
30
  autoload :Qdrant, "vectorsearch/qdrant"
data/lib/llm/cohere.rb CHANGED
@@ -51,7 +51,5 @@ module LLM
51
51
  def chat(...)
52
52
  complete(...)
53
53
  end
54
-
55
- alias_method :generate_embedding, :embed
56
54
  end
57
55
  end
data/lib/llm/openai.rb CHANGED
@@ -71,7 +71,5 @@ module LLM
71
71
  response = client.chat(parameters: default_params)
72
72
  response.dig("choices", 0, "message", "content")
73
73
  end
74
-
75
- alias_method :generate_embedding, :embed
76
74
  end
77
75
  end
data/lib/loaders/text.rb CHANGED
@@ -12,7 +12,7 @@ module Loaders
12
12
  #
13
13
 
14
14
  def loadable?
15
- true
15
+ @path.to_s.end_with?(".txt")
16
16
  end
17
17
 
18
18
  def load
@@ -50,7 +50,6 @@ module Vectorsearch
50
50
  end
51
51
 
52
52
  def_delegators :llm_client,
53
- :generate_embedding,
54
53
  :default_dimension
55
54
 
56
55
  def generate_prompt(question:, context:)
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ class Chroma < Base
5
+ # Initialize the Chroma client
6
+ # @param url [String] The URL of the Qdrant server
7
+ # @param api_key [String] The API key to use
8
+ # @param index_name [String] The name of the index to use
9
+ # @param llm [Symbol] The LLM to use
10
+ # @param llm_api_key [String] The API key for the LLM
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ depends_on "chroma-db"
13
+ require "chroma-db"
14
+
15
+ ::Chroma.connect_host = url
16
+ ::Chroma.logger = Langchain.logger
17
+ ::Chroma.log_level = Langchain.logger.level
18
+
19
+ @index_name = index_name
20
+
21
+ super(llm: llm, llm_api_key: llm_api_key)
22
+ end
23
+
24
+ # Add a list of texts to the index
25
+ # @param texts [Array] The list of texts to add
26
+ # @return [Hash] The response from the server
27
+ def add_texts(texts:)
28
+ embeddings = Array(texts).map do |text|
29
+ ::Chroma::Resources::Embedding.new(
30
+ # TODO: Add support for passing your own IDs
31
+ id: SecureRandom.uuid,
32
+ embedding: llm_client.embed(text: text),
33
+ # TODO: Add support for passing metadata
34
+ metadata: [], # metadatas[index],
35
+ document: text # Do we actually need to store the whole original document?
36
+ )
37
+ end
38
+
39
+ collection = ::Chroma::Resources::Collection.get(index_name)
40
+ collection.add(embeddings)
41
+ end
42
+
43
+ # Create the collection with the default schema
44
+ # @return [Hash] The response from the server
45
+ def create_default_schema
46
+ ::Chroma::Resources::Collection.create(index_name)
47
+ end
48
+
49
+ # Search for similar texts
50
+ # @param query [String] The text to search for
51
+ # @param k [Integer] The number of results to return
52
+ # @return [Chroma::Resources::Embedding] The response from the server
53
+ def similarity_search(
54
+ query:,
55
+ k: 4
56
+ )
57
+ embedding = llm_client.embed(text: query)
58
+
59
+ similarity_search_by_vector(
60
+ embedding: embedding,
61
+ k: k
62
+ )
63
+ end
64
+
65
+ # Search for similar texts by embedding
66
+ # @param embedding [Array] The embedding to search for
67
+ # @param k [Integer] The number of results to return
68
+ # @return [Chroma::Resources::Embedding] The response from the server
69
+ def similarity_search_by_vector(
70
+ embedding:,
71
+ k: 4
72
+ )
73
+ # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
74
+ # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
75
+ count = collection.count
76
+ n_results = [count, k].min
77
+
78
+ collection.query(query_embeddings: [embedding], results: n_results)
79
+ end
80
+
81
+ # Ask a question and return the answer
82
+ # @param question [String] The question to ask
83
+ # @return [String] The answer to the question
84
+ def ask(question:)
85
+ search_results = similarity_search(query: question)
86
+
87
+ context = search_results.map do |result|
88
+ result.document
89
+ end
90
+
91
+ context = context.join("\n---\n")
92
+
93
+ prompt = generate_prompt(question: question, context: context)
94
+
95
+ llm_client.chat(prompt: prompt)
96
+ end
97
+
98
+ private
99
+
100
+ # @return [Chroma::Resources::Collection] The collection
101
+ def collection
102
+ @collection ||= ::Chroma::Resources::Collection.get(index_name)
103
+ end
104
+ end
105
+ end
@@ -15,16 +15,16 @@ module Vectorsearch
15
15
  def add_texts(texts:)
16
16
  client.entities.insert(
17
17
  collection_name: index_name,
18
- num_rows: texts.count,
18
+ num_rows: Array(texts).size,
19
19
  fields_data: [
20
20
  {
21
21
  field_name: "content",
22
22
  type: ::Milvus::DATA_TYPES["varchar"],
23
- field: texts
23
+ field: Array(texts)
24
24
  }, {
25
25
  field_name: "vectors",
26
26
  type: ::Milvus::DATA_TYPES["binary_vector"],
27
- field: texts.map { |text| generate_embedding(text: text) }
27
+ field: Array(texts).map { |text| llm_client.embed(text: text) }
28
28
  }
29
29
  ]
30
30
  )
@@ -69,7 +69,7 @@ module Vectorsearch
69
69
  end
70
70
 
71
71
  def similarity_search(query:, k: 4)
72
- embedding = generate_embedding(text: query)
72
+ embedding = llm_client.embed(text: query)
73
73
 
74
74
  similarity_search_by_vector(
75
75
  embedding: embedding,
@@ -32,7 +32,7 @@ module Vectorsearch
32
32
  # TODO: Allows passing in your own IDs
33
33
  id: SecureRandom.uuid,
34
34
  metadata: {content: text},
35
- values: generate_embedding(text: text)
35
+ values: llm_client.embed(text: text)
36
36
  }
37
37
  end
38
38
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -27,9 +27,9 @@ module Vectorsearch
27
27
  def add_texts(texts:)
28
28
  batch = {ids: [], vectors: [], payloads: []}
29
29
 
30
- texts.each do |text|
30
+ Array(texts).each do |text|
31
31
  batch[:ids].push(SecureRandom.uuid)
32
- batch[:vectors].push(generate_embedding(text: text))
32
+ batch[:vectors].push(llm_client.embed(text: text))
33
33
  batch[:payloads].push({content: text})
34
34
  end
35
35
 
@@ -59,7 +59,7 @@ module Vectorsearch
59
59
  query:,
60
60
  k: 4
61
61
  )
62
- embedding = generate_embedding(text: query)
62
+ embedding = llm_client.embed(text: query)
63
63
 
64
64
  similarity_search_by_vector(
65
65
  embedding: embedding,
@@ -27,7 +27,7 @@ module Vectorsearch
27
27
  # @param texts [Array] The list of texts to add
28
28
  # @return [Hash] The response from the server
29
29
  def add_texts(texts:)
30
- objects = texts.map do |text|
30
+ objects = Array(texts).map do |text|
31
31
  {
32
32
  class: index_name,
33
33
  properties: {content: text}
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.7"
4
+ VERSION = "0.3.8"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-19 00:00:00.000000000 Z
11
+ date: 2023-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.9.3
55
+ - !ruby/object:Gem::Dependency
56
+ name: chroma-db
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.0
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.3.0
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: eqn
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -207,9 +221,9 @@ files:
207
221
  - LICENSE.txt
208
222
  - README.md
209
223
  - Rakefile
210
- - examples/.keep
211
224
  - examples/create_and_manage_few_shot_prompt_templates.rb
212
225
  - examples/create_and_manage_prompt_templates.rb
226
+ - examples/pdf_store_and_query_with_chroma.rb
213
227
  - examples/store_and_query_with_pinecone.rb
214
228
  - examples/store_and_query_with_qdrant.rb
215
229
  - examples/store_and_query_with_weaviate.rb
@@ -235,6 +249,7 @@ files:
235
249
  - lib/tool/serp_api.rb
236
250
  - lib/tool/wikipedia.rb
237
251
  - lib/vectorsearch/base.rb
252
+ - lib/vectorsearch/chroma.rb
238
253
  - lib/vectorsearch/milvus.rb
239
254
  - lib/vectorsearch/pinecone.rb
240
255
  - lib/vectorsearch/qdrant.rb
data/examples/.keep DELETED
File without changes