langchainrb 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa4976937d6ccf9b2c0e1f0e0d98f53a98f21f0d9d1abbe85d6cc83b646cb8d7
4
- data.tar.gz: e563cf1aaece650c2cbe2727b06b15c2e284276c78f1e820070d566a6d3b7386
3
+ metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
4
+ data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
5
5
  SHA512:
6
- metadata.gz: 70e086ce4951b95f1f232b436354f2566caedb0d200b55e3f7ed3bd9589043b06053e78198f97ed47335825edae528e6559b99378899b2c435d5f4baa66f137b
7
- data.tar.gz: 75a358762a36f9d37881fe9b265764cb4b8f9578c6598dd38bd3f7323b919e90c1531b7dda769b687e29eb595ec7852cc5698254a80dd5a5a334929894e783df
6
+ metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
7
+ data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
data/.env.example CHANGED
@@ -5,6 +5,7 @@ MILVUS_URL=
5
5
  OPENAI_API_KEY=
6
6
  PINECONE_API_KEY=
7
7
  PINECONE_ENVIRONMENT=
8
+ REPLICATE_API_KEY=
8
9
  QDRANT_API_KEY=
9
10
  QDRANT_URL=
10
11
  SERPAPI_API_KEY=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.10] - 2023-05-19
4
+ - 🗣️ LLMs
5
+ - Introducing support for Replicate.com
6
+
3
7
  ## [0.3.9] - 2023-05-19
4
8
  - 🚚 Loaders
5
9
  - Introduce `Loaders::Docx` to parse .docx files
data/Gemfile CHANGED
@@ -11,6 +11,6 @@ gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
13
 
14
- # TODO: Remove this once a new version of the gem is released
15
- # The new version of `docx` needs to include this fix: https://github.com/ruby-docx/docx/pull/130
16
- gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"
14
+ # TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
15
+ # Most likely everything will just need to be updated to `faraday 2.x`
16
+ gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
data/Gemfile.lock CHANGED
@@ -1,16 +1,18 @@
1
1
  GIT
2
- remote: https://github.com/ruby-docx/docx.git
3
- revision: 08b734da349d980d2d7549b907459a9e2aa5d3bb
4
- branch: master
2
+ remote: https://github.com/andreibondarev/replicate-ruby.git
3
+ revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
4
+ branch: faraday-1.x
5
5
  specs:
6
- docx (0.7.0)
7
- nokogiri (~> 1.13, >= 1.13.0)
8
- rubyzip (~> 2.0)
6
+ replicate-ruby (0.2.1)
7
+ addressable
8
+ faraday (>= 1.0)
9
+ faraday-multipart
10
+ faraday-retry
9
11
 
10
12
  PATH
11
13
  remote: .
12
14
  specs:
13
- langchainrb (0.3.9)
15
+ langchainrb (0.3.10)
14
16
 
15
17
  GEM
16
18
  remote: https://rubygems.org/
@@ -44,12 +46,15 @@ GEM
44
46
  dry-monads (~> 1.6)
45
47
  ruby-next-core (>= 0.15.0)
46
48
  coderay (1.1.3)
47
- cohere-ruby (0.9.3)
48
- faraday (~> 1)
49
- faraday_middleware (~> 1)
49
+ cohere-ruby (0.9.4)
50
+ faraday (>= 1.0.0)
51
+ faraday_middleware (>= 1.0.0)
50
52
  concurrent-ruby (1.2.2)
51
53
  crass (1.0.6)
52
54
  diff-lcs (1.5.0)
55
+ docx (0.8.0)
56
+ nokogiri (~> 1.13, >= 1.13.0)
57
+ rubyzip (~> 2.0)
53
58
  dotenv (2.7.6)
54
59
  dotenv-rails (2.7.6)
55
60
  dotenv (= 2.7.6)
@@ -136,8 +141,8 @@ GEM
136
141
  httparty (0.21.0)
137
142
  mini_mime (>= 1.0.0)
138
143
  multi_xml (>= 0.5.2)
139
- hugging-face (0.3.3)
140
- faraday (~> 1.0)
144
+ hugging-face (0.3.4)
145
+ faraday (>= 1.0)
141
146
  i18n (1.13.0)
142
147
  concurrent-ruby (~> 1.0)
143
148
  ice_nine (0.11.2)
@@ -276,12 +281,12 @@ PLATFORMS
276
281
 
277
282
  DEPENDENCIES
278
283
  chroma-db (~> 0.3.0)
279
- cohere-ruby (~> 0.9.3)
280
- docx!
284
+ cohere-ruby (~> 0.9.4)
285
+ docx (~> 0.8.0)
281
286
  dotenv-rails (~> 2.7.6)
282
287
  eqn (~> 1.6.5)
283
288
  google_search_results (~> 2.0.0)
284
- hugging-face (~> 0.3.3)
289
+ hugging-face (~> 0.3.4)
285
290
  langchainrb!
286
291
  milvus (~> 0.9.0)
287
292
  pdf-reader (~> 1.4)
@@ -289,6 +294,7 @@ DEPENDENCIES
289
294
  pry-byebug (~> 3.10.0)
290
295
  qdrant-ruby (~> 0.9.0)
291
296
  rake (~> 13.0)
297
+ replicate-ruby!
292
298
  rspec (~> 3.0)
293
299
  ruby-openai (~> 4.0.0)
294
300
  standardrb
data/README.md CHANGED
@@ -28,13 +28,13 @@ require "langchain"
28
28
 
29
29
  #### Supported vector search databases and features:
30
30
 
31
- | Database | Querying | Storage | Schema Management | Backups | Rails Integration | ??? |
32
- | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:| ---:|
33
- | Chroma | :white_check_mark: | WIP | WIP | WIP | WIP | |
34
- | Milvus | :white_check_mark: | WIP | WIP | WIP | WIP | |
35
- | Pinecone | :white_check_mark: | WIP | WIP | WIP | WIP | |
36
- | Qdrant | :white_check_mark: | WIP | WIP | WIP | WIP | |
37
- | Weaviate | :white_check_mark: | WIP | WIP | WIP | WIP | |
31
+ | Database | Querying | Storage | Schema Management | Backups | Rails Integration |
32
+ | -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
33
+ | [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
34
+ | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
35
+ | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
36
+ | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
37
+ | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
38
38
 
39
39
  ### Using Vector Search Databases 🔍
40
40
 
@@ -76,8 +76,9 @@ client.add_texts(
76
76
  # Store the contents of your files in your vector search database
77
77
  my_pdf = Langchain.root.join("path/to/my.pdf")
78
78
  my_text = Langchain.root.join("path/to/my.txt")
79
+ my_docx = Langchain.root.join("path/to/my.docx")
79
80
 
80
- client.add_data(paths: [my_pdf, my_text])
81
+ client.add_data(paths: [my_pdf, my_text, my_docx])
81
82
  ```
82
83
  ```ruby
83
84
  # Retrieve similar documents based on the query string passed in
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
130
131
 
131
132
  #### HuggingFace
132
133
  Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
134
+ ```ruby
135
+ cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
136
+ ```
137
+
138
+ #### Replicate
139
+ Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
140
+ ```ruby
141
+ cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
142
+ ```
133
143
 
134
144
  ### Using Prompts 📋
135
145
 
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
254
264
 
255
265
  | Name | Class | Gem Requirements |
256
266
  | ---- | ------------- | :--------------------------: |
267
+ | docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
257
268
  | pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
258
269
  | text | Loaders::Text | |
259
270
 
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
14
14
  # Create the default schema.
15
15
  chroma.create_default_schema
16
16
 
17
+ # gem install these or add them to your Gemfile
18
+ # Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
19
+ # Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
20
+
17
21
  # Set up an array of PDF and TXT documents
18
22
  docs = [
19
23
  Langchain.root.join("/docs/document.pdf"),
20
- Langchain.root.join("/docs/document.txt")
24
+ Langchain.root.join("/docs/document.txt"),
25
+ Langchain.root.join("/docs/document.docx")
21
26
  ]
22
27
 
23
28
  # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
data/lib/langchain.rb CHANGED
@@ -36,6 +36,7 @@ module LLM
36
36
  autoload :Cohere, "llm/cohere"
37
37
  autoload :HuggingFace, "llm/hugging_face"
38
38
  autoload :OpenAI, "llm/openai"
39
+ autoload :Replicate, "llm/replicate"
39
40
  end
40
41
 
41
42
  module Prompt
data/lib/llm/base.rb CHANGED
@@ -9,7 +9,8 @@ module LLM
9
9
  LLMS = {
10
10
  openai: "OpenAI",
11
11
  cohere: "Cohere",
12
- huggingface: "HuggingFace"
12
+ huggingface: "HuggingFace",
13
+ replicate: "Replicate"
13
14
  }.freeze
14
15
 
15
16
  def default_dimension
@@ -0,0 +1,88 @@
1
+ # frozen_string_literal: true
2
+
3
+ module LLM
4
+ class Replicate < Base
5
+ # Wrapper around Replicate.com LLM provider
6
+ # Use it directly:
7
+ # replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
8
+ #
9
+ # Or pass it to be instantiated by a vector search DB:
10
+ # chroma = Vectorsearch::Chroma.new(
11
+ # url: ENV["CHROMA_URL"],
12
+ # index_name: "...",
13
+ # llm: :replicate,
14
+ # llm_api_key: ENV["REPLICATE_API_KEY"],
15
+ # )
16
+
17
+ DEFAULTS = {
18
+ # TODO: Figure out how to send the temperature to the API
19
+ temperature: 0.01, # Minimum accepted value
20
+ # TODO: Design the interface to pass and use different models
21
+ completion_model_name: "replicate/vicuna-13b",
22
+ embeddings_model_name: "creatorrr/all-mpnet-base-v2",
23
+ dimension: 384
24
+ }.freeze
25
+
26
+ # Intialize the Replicate LLM
27
+ # @param api_key [String] The API key to use
28
+ def initialize(api_key:)
29
+ depends_on "replicate-ruby"
30
+ require "replicate"
31
+
32
+ ::Replicate.configure do |config|
33
+ config.api_token = api_key
34
+ end
35
+
36
+ @client = ::Replicate.client
37
+ end
38
+
39
+ # Generate an embedding for a given text
40
+ # @param text [String] The text to generate an embedding for
41
+ # @return [Hash] The embedding
42
+ def embed(text:)
43
+ response = embeddings_model.predict(input: text)
44
+
45
+ until response.finished?
46
+ response.refetch
47
+ sleep(1)
48
+ end
49
+
50
+ response.output
51
+ end
52
+
53
+ # Generate a completion for a given prompt
54
+ # @param prompt [String] The prompt to generate a completion for
55
+ # @return [Hash] The completion
56
+ def complete(prompt:, **params)
57
+ response = completion_model.predict(prompt: prompt)
58
+
59
+ until response.finished?
60
+ response.refetch
61
+ sleep(1)
62
+ end
63
+
64
+ # Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
65
+ # The first array element is missing a space at the end, so we add it manually
66
+ response.output[0] += " "
67
+
68
+ response.output.join
69
+ end
70
+
71
+ # Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
72
+ def chat(...)
73
+ complete(...)
74
+ end
75
+
76
+ alias_method :generate_embedding, :embed
77
+
78
+ private
79
+
80
+ def completion_model
81
+ @completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
82
+ end
83
+
84
+ def embeddings_model
85
+ @embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
86
+ end
87
+ end
88
+ end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.9"
4
+ VERSION = "0.3.10"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.9
4
+ version: 0.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-20 00:00:00.000000000 Z
11
+ date: 2023-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -39,47 +39,47 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: 3.10.0
41
41
  - !ruby/object:Gem::Dependency
42
- name: cohere-ruby
42
+ name: chroma-db
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: 0.9.3
47
+ version: 0.3.0
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: 0.9.3
54
+ version: 0.3.0
55
55
  - !ruby/object:Gem::Dependency
56
- name: chroma-db
56
+ name: cohere-ruby
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.3.0
61
+ version: 0.9.4
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.3.0
68
+ version: 0.9.4
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: docx
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: 0.8.0
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: 0.8.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: eqn
85
85
  requirement: !ruby/object:Gem::Requirement
@@ -114,14 +114,14 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 0.3.3
117
+ version: 0.3.4
118
118
  type: :development
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 0.3.3
124
+ version: 0.3.4
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: milvus
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -164,6 +164,20 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: 0.1.6
167
+ - !ruby/object:Gem::Dependency
168
+ name: replicate-ruby
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - ">="
172
+ - !ruby/object:Gem::Version
173
+ version: '0'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - ">="
179
+ - !ruby/object:Gem::Version
180
+ version: '0'
167
181
  - !ruby/object:Gem::Dependency
168
182
  name: qdrant-ruby
169
183
  requirement: !ruby/object:Gem::Requirement
@@ -250,6 +264,7 @@ files:
250
264
  - lib/llm/cohere.rb
251
265
  - lib/llm/hugging_face.rb
252
266
  - lib/llm/openai.rb
267
+ - lib/llm/replicate.rb
253
268
  - lib/loader.rb
254
269
  - lib/loaders/base.rb
255
270
  - lib/loaders/docx.rb