langchainrb 0.3.8 → 0.3.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
- data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
3
+ metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
4
+ data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
5
5
  SHA512:
6
- metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
- data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
6
+ metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
7
+ data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
data/.env.example CHANGED
@@ -5,6 +5,7 @@ MILVUS_URL=
5
5
  OPENAI_API_KEY=
6
6
  PINECONE_API_KEY=
7
7
  PINECONE_ENVIRONMENT=
8
+ REPLICATE_API_KEY=
8
9
  QDRANT_API_KEY=
9
10
  QDRANT_URL=
10
11
  SERPAPI_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.10] - 2023-05-19
4
+ - 🗣️ LLMs
5
+ - Introducing support for Replicate.com
6
+
7
+ ## [0.3.9] - 2023-05-19
8
+ - 🚚 Loaders
9
+ - Introduce `Loaders::Docx` to parse .docx files
10
+
3
11
  ## [0.3.8] - 2023-05-19
4
12
  - 🔍 Vectorsearch
5
13
  - Introduce support for Chroma DB
data/Gemfile CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
+
14
+ # TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
15
+ # Most likely everything will just need to be updated to `faraday 2.x`
16
+ gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
data/Gemfile.lock CHANGED
@@ -1,7 +1,18 @@
1
+ GIT
2
+ remote: https://github.com/andreibondarev/replicate-ruby.git
3
+ revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
4
+ branch: faraday-1.x
5
+ specs:
6
+ replicate-ruby (0.2.1)
7
+ addressable
8
+ faraday (>= 1.0)
9
+ faraday-multipart
10
+ faraday-retry
11
+
1
12
  PATH
2
13
  remote: .
3
14
  specs:
4
- langchainrb (0.3.8)
15
+ langchainrb (0.3.10)
5
16
 
6
17
  GEM
7
18
  remote: https://rubygems.org/
@@ -35,12 +46,15 @@ GEM
35
46
  dry-monads (~> 1.6)
36
47
  ruby-next-core (>= 0.15.0)
37
48
  coderay (1.1.3)
38
- cohere-ruby (0.9.3)
39
- faraday (~> 1)
40
- faraday_middleware (~> 1)
49
+ cohere-ruby (0.9.4)
50
+ faraday (>= 1.0.0)
51
+ faraday_middleware (>= 1.0.0)
41
52
  concurrent-ruby (1.2.2)
42
53
  crass (1.0.6)
43
54
  diff-lcs (1.5.0)
55
+ docx (0.8.0)
56
+ nokogiri (~> 1.13, >= 1.13.0)
57
+ rubyzip (~> 2.0)
44
58
  dotenv (2.7.6)
45
59
  dotenv-rails (2.7.6)
46
60
  dotenv (= 2.7.6)
@@ -127,8 +141,8 @@ GEM
127
141
  httparty (0.21.0)
128
142
  mini_mime (>= 1.0.0)
129
143
  multi_xml (>= 0.5.2)
130
- hugging-face (0.3.3)
131
- faraday (~> 1.0)
144
+ hugging-face (0.3.4)
145
+ faraday (>= 1.0)
132
146
  i18n (1.13.0)
133
147
  concurrent-ruby (~> 1.0)
134
148
  ice_nine (0.11.2)
@@ -230,6 +244,7 @@ GEM
230
244
  ruby-progressbar (1.13.0)
231
245
  ruby-rc4 (0.1.5)
232
246
  ruby2_keywords (0.0.5)
247
+ rubyzip (2.3.2)
233
248
  standard (1.28.2)
234
249
  language_server-protocol (~> 3.17.0.2)
235
250
  lint_roller (~> 1.0)
@@ -266,11 +281,12 @@ PLATFORMS
266
281
 
267
282
  DEPENDENCIES
268
283
  chroma-db (~> 0.3.0)
269
- cohere-ruby (~> 0.9.3)
284
+ cohere-ruby (~> 0.9.4)
285
+ docx (~> 0.8.0)
270
286
  dotenv-rails (~> 2.7.6)
271
287
  eqn (~> 1.6.5)
272
288
  google_search_results (~> 2.0.0)
273
- hugging-face (~> 0.3.3)
289
+ hugging-face (~> 0.3.4)
274
290
  langchainrb!
275
291
  milvus (~> 0.9.0)
276
292
  pdf-reader (~> 1.4)
@@ -278,6 +294,7 @@ DEPENDENCIES
278
294
  pry-byebug (~> 3.10.0)
279
295
  qdrant-ruby (~> 0.9.0)
280
296
  rake (~> 13.0)
297
+ replicate-ruby!
281
298
  rspec (~> 3.0)
282
299
  ruby-openai (~> 4.0.0)
283
300
  standardrb
data/README.md CHANGED
@@ -28,13 +28,13 @@ require "langchain"
28
28
 
29
29
  #### Supported vector search databases and features:
30
30
 
31
- | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
- | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
- | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
31
+ | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
32
+ | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
33
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
34
+ | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
35
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
36
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
37
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
38
 
39
39
  ### Using Vector Search Databases 🔍
40
40
 
@@ -76,8 +76,9 @@ client.add_texts(
76
76
  # Store the contents of your files in your vector search database
77
77
  my_pdf = Langchain.root.join("path/to/my.pdf")
78
78
  my_text = Langchain.root.join("path/to/my.txt")
79
+ my_docx = Langchain.root.join("path/to/my.docx")
79
80
 
80
- client.add_data(paths: [my_pdf, my_text])
81
+ client.add_data(paths: [my_pdf, my_text, my_docx])
81
82
  ```
82
83
  ```ruby
83
84
  # Retrieve similar documents based on the query string passed in
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
130
131
 
131
132
  #### HuggingFace
132
133
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
134
+ ```ruby
135
+ cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
+ ```
137
+
138
+ #### Replicate
139
+ Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
140
+ ```ruby
141
+ cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
+ ```
133
143
 
134
144
  ### Using Prompts 📋
135
145
 
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
254
264
 
255
265
  | Name | Class | Gem Requirements |
256
266
  | ---- | ------------- | :--------------------------: |
267
+ | docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
257
268
  | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
258
269
  | text | Loaders::Text | |
259
270
 
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
14
14
  # Create the default schema.
15
15
  chroma.create_default_schema
16
16
 
17
+ # gem install these or add them to your Gemfile
18
+ # Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
19
+ # Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
20
+
17
21
  # Set up an array of PDF and TXT documents
18
22
  docs = [
19
23
  Langchain.root.join("/docs/document.pdf"),
20
- Langchain.root.join("/docs/document.txt")
24
+ Langchain.root.join("/docs/document.txt"),
25
+ Langchain.root.join("/docs/document.docx")
21
26
  ]
22
27
 
23
28
  # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
data/lib/langchain.rb CHANGED
@@ -36,6 +36,7 @@ module LLM
36
36
  autoload :Cohere, "llm/cohere"
37
37
  autoload :HuggingFace, "llm/hugging_face"
38
38
  autoload :OpenAI, "llm/openai"
39
+ autoload :Replicate, "llm/replicate"
39
40
  end
40
41
 
41
42
  module Prompt
@@ -55,6 +56,7 @@ end
55
56
 
56
57
  module Loaders
57
58
  autoload :Base, "loaders/base"
59
+ autoload :Docx, "loaders/docx"
58
60
  autoload :PDF, "loaders/pdf"
59
61
  autoload :Text, "loaders/text"
60
62
  end
@@ -62,4 +64,4 @@ end
62
64
  autoload :Loader, "loader"
63
65
 
64
66
  # Load the default Loaders
65
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
67
+ Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
data/lib/llm/base.rb CHANGED
@@ -9,7 +9,8 @@ module LLM
9
9
  LLMS = {
10
10
  openai: "OpenAI",
11
11
  cohere: "Cohere",
12
- huggingface: "HuggingFace"
12
+ huggingface: "HuggingFace",
13
+ replicate: "Replicate"
13
14
  }.freeze
14
15
 
15
16
  def default_dimension
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ class Replicate < Base
5
+ # Wrapper around Replicate.com LLM provider
6
+ # Use it directly:
7
+ # replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
8
+ #
9
+ # Or pass it to be instantiated by a vector search DB:
10
+ # chroma = Vectorsearch::Chroma.new(
11
+ # url: ENV["CHROMA_URL"],
12
+ # index_name: "...",
13
+ # llm: :replicate,
14
+ # llm_api_key: ENV["REPLICATE_API_KEY"],
15
+ # )
16
+
17
+ DEFAULTS = {
18
+ # TODO: Figure out how to send the temperature to the API
19
+ temperature: 0.01, # Minimum accepted value
20
+ # TODO: Design the interface to pass and use different models
21
+ completion_model_name: "replicate/vicuna-13b",
22
+ embeddings_model_name: "creatorrr/all-mpnet-base-v2",
23
+ dimension: 384
24
+ }.freeze
25
+
26
+ # Intialize the Replicate LLM
27
+ # @param api_key [String] The API key to use
28
+ def initialize(api_key:)
29
+ depends_on "replicate-ruby"
30
+ require "replicate"
31
+
32
+ ::Replicate.configure do |config|
33
+ config.api_token = api_key
34
+ end
35
+
36
+ @client = ::Replicate.client
37
+ end
38
+
39
+ # Generate an embedding for a given text
40
+ # @param text [String] The text to generate an embedding for
41
+ # @return [Hash] The embedding
42
+ def embed(text:)
43
+ response = embeddings_model.predict(input: text)
44
+
45
+ until response.finished?
46
+ response.refetch
47
+ sleep(1)
48
+ end
49
+
50
+ response.output
51
+ end
52
+
53
+ # Generate a completion for a given prompt
54
+ # @param prompt [String] The prompt to generate a completion for
55
+ # @return [Hash] The completion
56
+ def complete(prompt:, **params)
57
+ response = completion_model.predict(prompt: prompt)
58
+
59
+ until response.finished?
60
+ response.refetch
61
+ sleep(1)
62
+ end
63
+
64
+ # Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
65
+ # The first array element is missing a space at the end, so we add it manually
66
+ response.output[0] += " "
67
+
68
+ response.output.join
69
+ end
70
+
71
+ # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
72
+ def chat(...)
73
+ complete(...)
74
+ end
75
+
76
+ alias_method :generate_embedding, :embed
77
+
78
+ private
79
+
80
+ def completion_model
81
+ @completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
82
+ end
83
+
84
+ def embeddings_model
85
+ @embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
86
+ end
87
+ end
88
+ end
data/lib/loaders/base.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # TODO: Add chunking options to the loaders
4
+
3
5
  module Loaders
4
6
  class Base
5
7
  def self.load(path)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loaders
4
+ class Docx < Base
5
+ #
6
+ # This Loader parses Docx files into text.
7
+ # If you'd like to use it directly you can do so like this:
8
+ # Loaders::Docx.new("path/to/my.docx").load
9
+ #
10
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
+ # qdrant = Vectorsearch::Qdrant.new(...)
12
+ # path = Langchain.root.join("path/to/my.docx")
13
+ # qdrant.add_data(path: path)
14
+ #
15
+
16
+ def initialize(path)
17
+ depends_on "docx"
18
+ require "docx"
19
+
20
+ @path = path
21
+ end
22
+
23
+ # Check that the file is a `.docx` file
24
+ def loadable?
25
+ @path.to_s.end_with?(".docx")
26
+ end
27
+
28
+ def load
29
+ ::Docx::Document
30
+ .open(@path.to_s)
31
+ .text
32
+ end
33
+ end
34
+ end
data/lib/loaders/pdf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class PDF < Base
3
5
  #
data/lib/loaders/text.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class Text < Base
3
5
  #
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.8"
4
+ VERSION = "0.3.10"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -38,34 +38,48 @@ dependencies:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.10.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: chroma-db
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.3.0
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.3.0
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: cohere-ruby
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: 0.9.3
61
+ version: 0.9.4
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: 0.9.3
68
+ version: 0.9.4
55
69
  - !ruby/object:Gem::Dependency
56
- name: chroma-db
70
+ name: docx
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 0.3.0
75
+ version: 0.8.0
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 0.3.0
82
+ version: 0.8.0
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: eqn
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -100,14 +114,14 @@ dependencies:
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: 0.3.3
117
+ version: 0.3.4
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: 0.3.3
124
+ version: 0.3.4
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: milvus
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -150,6 +164,20 @@ dependencies:
150
164
  - - "~>"
151
165
  - !ruby/object:Gem::Version
152
166
  version: 0.1.6
167
+ - !ruby/object:Gem::Dependency
168
+ name: replicate-ruby
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
153
181
  - !ruby/object:Gem::Dependency
154
182
  name: qdrant-ruby
155
183
  requirement: !ruby/object:Gem::Requirement
@@ -236,8 +264,10 @@ files:
236
264
  - lib/llm/cohere.rb
237
265
  - lib/llm/hugging_face.rb
238
266
  - lib/llm/openai.rb
267
+ - lib/llm/replicate.rb
239
268
  - lib/loader.rb
240
269
  - lib/loaders/base.rb
270
+ - lib/loaders/docx.rb
241
271
  - lib/loaders/pdf.rb
242
272
  - lib/loaders/text.rb
243
273
  - lib/prompt/base.rb