langchainrb 0.3.8 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
- data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
3
+ metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
4
+ data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
5
5
  SHA512:
6
- metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
- data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
6
+ metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
7
+ data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
data/.env.example CHANGED
@@ -5,6 +5,7 @@ MILVUS_URL=
5
5
  OPENAI_API_KEY=
6
6
  PINECONE_API_KEY=
7
7
  PINECONE_ENVIRONMENT=
8
+ REPLICATE_API_KEY=
8
9
  QDRANT_API_KEY=
9
10
  QDRANT_URL=
10
11
  SERPAPI_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.10] - 2023-05-19
4
+ - 🗣️ LLMs
5
+ - Introducing support for Replicate.com
6
+
7
+ ## [0.3.9] - 2023-05-19
8
+ - 🚚 Loaders
9
+ - Introduce `Loaders::Docx` to parse .docx files
10
+
3
11
  ## [0.3.8] - 2023-05-19
4
12
  - 🔍 Vectorsearch
5
13
  - Introduce support for Chroma DB
data/Gemfile CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
+
14
+ # TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
15
+ # Most likely everything will just need to be updated to `faraday 2.x`
16
+ gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
data/Gemfile.lock CHANGED
@@ -1,7 +1,18 @@
1
+ GIT
2
+ remote: https://github.com/andreibondarev/replicate-ruby.git
3
+ revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
4
+ branch: faraday-1.x
5
+ specs:
6
+ replicate-ruby (0.2.1)
7
+ addressable
8
+ faraday (>= 1.0)
9
+ faraday-multipart
10
+ faraday-retry
11
+
1
12
  PATH
2
13
  remote: .
3
14
  specs:
4
- langchainrb (0.3.8)
15
+ langchainrb (0.3.10)
5
16
 
6
17
  GEM
7
18
  remote: https://rubygems.org/
@@ -35,12 +46,15 @@ GEM
35
46
  dry-monads (~> 1.6)
36
47
  ruby-next-core (>= 0.15.0)
37
48
  coderay (1.1.3)
38
- cohere-ruby (0.9.3)
39
- faraday (~> 1)
40
- faraday_middleware (~> 1)
49
+ cohere-ruby (0.9.4)
50
+ faraday (>= 1.0.0)
51
+ faraday_middleware (>= 1.0.0)
41
52
  concurrent-ruby (1.2.2)
42
53
  crass (1.0.6)
43
54
  diff-lcs (1.5.0)
55
+ docx (0.8.0)
56
+ nokogiri (~> 1.13, >= 1.13.0)
57
+ rubyzip (~> 2.0)
44
58
  dotenv (2.7.6)
45
59
  dotenv-rails (2.7.6)
46
60
  dotenv (= 2.7.6)
@@ -127,8 +141,8 @@ GEM
127
141
  httparty (0.21.0)
128
142
  mini_mime (>= 1.0.0)
129
143
  multi_xml (>= 0.5.2)
130
- hugging-face (0.3.3)
131
- faraday (~> 1.0)
144
+ hugging-face (0.3.4)
145
+ faraday (>= 1.0)
132
146
  i18n (1.13.0)
133
147
  concurrent-ruby (~> 1.0)
134
148
  ice_nine (0.11.2)
@@ -230,6 +244,7 @@ GEM
230
244
  ruby-progressbar (1.13.0)
231
245
  ruby-rc4 (0.1.5)
232
246
  ruby2_keywords (0.0.5)
247
+ rubyzip (2.3.2)
233
248
  standard (1.28.2)
234
249
  language_server-protocol (~> 3.17.0.2)
235
250
  lint_roller (~> 1.0)
@@ -266,11 +281,12 @@ PLATFORMS
266
281
 
267
282
  DEPENDENCIES
268
283
  chroma-db (~> 0.3.0)
269
- cohere-ruby (~> 0.9.3)
284
+ cohere-ruby (~> 0.9.4)
285
+ docx (~> 0.8.0)
270
286
  dotenv-rails (~> 2.7.6)
271
287
  eqn (~> 1.6.5)
272
288
  google_search_results (~> 2.0.0)
273
- hugging-face (~> 0.3.3)
289
+ hugging-face (~> 0.3.4)
274
290
  langchainrb!
275
291
  milvus (~> 0.9.0)
276
292
  pdf-reader (~> 1.4)
@@ -278,6 +294,7 @@ DEPENDENCIES
278
294
  pry-byebug (~> 3.10.0)
279
295
  qdrant-ruby (~> 0.9.0)
280
296
  rake (~> 13.0)
297
+ replicate-ruby!
281
298
  rspec (~> 3.0)
282
299
  ruby-openai (~> 4.0.0)
283
300
  standardrb
data/README.md CHANGED
@@ -28,13 +28,13 @@ require "langchain"
28
28
 
29
29
  #### Supported vector search databases and features:
30
30
 
31
- | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
- | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
- | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
31
+ | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
32
+ | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
33
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
34
+ | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
35
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
36
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
37
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
38
 
39
39
  ### Using Vector Search Databases 🔍
40
40
 
@@ -76,8 +76,9 @@ client.add_texts(
76
76
  # Store the contents of your files in your vector search database
77
77
  my_pdf = Langchain.root.join("path/to/my.pdf")
78
78
  my_text = Langchain.root.join("path/to/my.txt")
79
+ my_docx = Langchain.root.join("path/to/my.docx")
79
80
 
80
- client.add_data(paths: [my_pdf, my_text])
81
+ client.add_data(paths: [my_pdf, my_text, my_docx])
81
82
  ```
82
83
  ```ruby
83
84
  # Retrieve similar documents based on the query string passed in
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
130
131
 
131
132
  #### HuggingFace
132
133
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
134
+ ```ruby
135
+ cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
+ ```
137
+
138
+ #### Replicate
139
+ Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
140
+ ```ruby
141
+ cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
+ ```
133
143
 
134
144
  ### Using Prompts 📋
135
145
 
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
254
264
 
255
265
  | Name | Class | Gem Requirements |
256
266
  | ---- | ------------- | :--------------------------: |
267
+ | docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
257
268
  | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
258
269
  | text | Loaders::Text | |
259
270
 
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
14
14
  # Create the default schema.
15
15
  chroma.create_default_schema
16
16
 
17
+ # gem install these or add them to your Gemfile
18
+ # Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
19
+ # Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
20
+
17
21
  # Set up an array of PDF and TXT documents
18
22
  docs = [
19
23
  Langchain.root.join("/docs/document.pdf"),
20
- Langchain.root.join("/docs/document.txt")
24
+ Langchain.root.join("/docs/document.txt"),
25
+ Langchain.root.join("/docs/document.docx")
21
26
  ]
22
27
 
23
28
  # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
data/lib/langchain.rb CHANGED
@@ -36,6 +36,7 @@ module LLM
36
36
  autoload :Cohere, "llm/cohere"
37
37
  autoload :HuggingFace, "llm/hugging_face"
38
38
  autoload :OpenAI, "llm/openai"
39
+ autoload :Replicate, "llm/replicate"
39
40
  end
40
41
 
41
42
  module Prompt
@@ -55,6 +56,7 @@ end
55
56
 
56
57
  module Loaders
57
58
  autoload :Base, "loaders/base"
59
+ autoload :Docx, "loaders/docx"
58
60
  autoload :PDF, "loaders/pdf"
59
61
  autoload :Text, "loaders/text"
60
62
  end
@@ -62,4 +64,4 @@ end
62
64
  autoload :Loader, "loader"
63
65
 
64
66
  # Load the default Loaders
65
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
67
+ Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
data/lib/llm/base.rb CHANGED
@@ -9,7 +9,8 @@ module LLM
9
9
  LLMS = {
10
10
  openai: "OpenAI",
11
11
  cohere: "Cohere",
12
- huggingface: "HuggingFace"
12
+ huggingface: "HuggingFace",
13
+ replicate: "Replicate"
13
14
  }.freeze
14
15
 
15
16
  def default_dimension
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ class Replicate < Base
5
+ # Wrapper around Replicate.com LLM provider
6
+ # Use it directly:
7
+ # replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
8
+ #
9
+ # Or pass it to be instantiated by a vector search DB:
10
+ # chroma = Vectorsearch::Chroma.new(
11
+ # url: ENV["CHROMA_URL"],
12
+ # index_name: "...",
13
+ # llm: :replicate,
14
+ # llm_api_key: ENV["REPLICATE_API_KEY"],
15
+ # )
16
+
17
+ DEFAULTS = {
18
+ # TODO: Figure out how to send the temperature to the API
19
+ temperature: 0.01, # Minimum accepted value
20
+ # TODO: Design the interface to pass and use different models
21
+ completion_model_name: "replicate/vicuna-13b",
22
+ embeddings_model_name: "creatorrr/all-mpnet-base-v2",
23
+ dimension: 384
24
+ }.freeze
25
+
26
+ # Intialize the Replicate LLM
27
+ # @param api_key [String] The API key to use
28
+ def initialize(api_key:)
29
+ depends_on "replicate-ruby"
30
+ require "replicate"
31
+
32
+ ::Replicate.configure do |config|
33
+ config.api_token = api_key
34
+ end
35
+
36
+ @client = ::Replicate.client
37
+ end
38
+
39
+ # Generate an embedding for a given text
40
+ # @param text [String] The text to generate an embedding for
41
+ # @return [Hash] The embedding
42
+ def embed(text:)
43
+ response = embeddings_model.predict(input: text)
44
+
45
+ until response.finished?
46
+ response.refetch
47
+ sleep(1)
48
+ end
49
+
50
+ response.output
51
+ end
52
+
53
+ # Generate a completion for a given prompt
54
+ # @param prompt [String] The prompt to generate a completion for
55
+ # @return [Hash] The completion
56
+ def complete(prompt:, **params)
57
+ response = completion_model.predict(prompt: prompt)
58
+
59
+ until response.finished?
60
+ response.refetch
61
+ sleep(1)
62
+ end
63
+
64
+ # Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
65
+ # The first array element is missing a space at the end, so we add it manually
66
+ response.output[0] += " "
67
+
68
+ response.output.join
69
+ end
70
+
71
+ # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
72
+ def chat(...)
73
+ complete(...)
74
+ end
75
+
76
+ alias_method :generate_embedding, :embed
77
+
78
+ private
79
+
80
+ def completion_model
81
+ @completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
82
+ end
83
+
84
+ def embeddings_model
85
+ @embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
86
+ end
87
+ end
88
+ end
data/lib/loaders/base.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # TODO: Add chunking options to the loaders
4
+
3
5
  module Loaders
4
6
  class Base
5
7
  def self.load(path)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loaders
4
+ class Docx < Base
5
+ #
6
+ # This Loader parses Docx files into text.
7
+ # If you'd like to use it directly you can do so like this:
8
+ # Loaders::Docx.new("path/to/my.docx").load
9
+ #
10
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
+ # qdrant = Vectorsearch::Qdrant.new(...)
12
+ # path = Langchain.root.join("path/to/my.docx")
13
+ # qdrant.add_data(path: path)
14
+ #
15
+
16
+ def initialize(path)
17
+ depends_on "docx"
18
+ require "docx"
19
+
20
+ @path = path
21
+ end
22
+
23
+ # Check that the file is a `.docx` file
24
+ def loadable?
25
+ @path.to_s.end_with?(".docx")
26
+ end
27
+
28
+ def load
29
+ ::Docx::Document
30
+ .open(@path.to_s)
31
+ .text
32
+ end
33
+ end
34
+ end
data/lib/loaders/pdf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class PDF < Base
3
5
  #
data/lib/loaders/text.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class Text < Base
3
5
  #
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.8"
4
+ VERSION = "0.3.10"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -38,34 +38,48 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.10.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: chroma-db
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.3.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.3.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: cohere-ruby
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: 0.9.3
61
+ version: 0.9.4
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: 0.9.3
68
+ version: 0.9.4
55
69
  - !ruby/object:Gem::Dependency
56
- name: chroma-db
70
+ name: docx
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 0.3.0
75
+ version: 0.8.0
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 0.3.0
82
+ version: 0.8.0
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: eqn
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -100,14 +114,14 @@ dependencies:
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: 0.3.3
117
+ version: 0.3.4
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: 0.3.3
124
+ version: 0.3.4
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: milvus
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +164,20 @@ dependencies:
150
164
  - - "~>"
151
165
  - !ruby/object:Gem::Version
152
166
  version: 0.1.6
167
+ - !ruby/object:Gem::Dependency
168
+ name: replicate-ruby
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
153
181
  - !ruby/object:Gem::Dependency
154
182
  name: qdrant-ruby
155
183
  requirement: !ruby/object:Gem::Requirement
@@ -236,8 +264,10 @@ files:
236
264
  - lib/llm/cohere.rb
237
265
  - lib/llm/hugging_face.rb
238
266
  - lib/llm/openai.rb
267
+ - lib/llm/replicate.rb
239
268
  - lib/loader.rb
240
269
  - lib/loaders/base.rb
270
+ - lib/loaders/docx.rb
241
271
  - lib/loaders/pdf.rb
242
272
  - lib/loaders/text.rb
243
273
  - lib/prompt/base.rb