langchainrb 0.3.9 → 0.3.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env.example +1 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +3 -3
- data/Gemfile.lock +21 -15
- data/README.md +19 -8
- data/examples/pdf_store_and_query_with_chroma.rb +6 -1
- data/lib/langchain.rb +1 -0
- data/lib/llm/base.rb +2 -1
- data/lib/llm/replicate.rb +88 -0
- data/lib/version.rb +1 -1
- metadata +29 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
|
4
|
+
data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
|
7
|
+
data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
|
data/.env.example
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -11,6 +11,6 @@ gem "rspec", "~> 3.0"
|
|
11
11
|
|
12
12
|
gem "standardrb"
|
13
13
|
|
14
|
-
# TODO:
|
15
|
-
#
|
16
|
-
gem "
|
14
|
+
# TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
|
15
|
+
# Most likely everything will just need to be updated to `faraday 2.x`
|
16
|
+
gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
|
data/Gemfile.lock
CHANGED
@@ -1,16 +1,18 @@
|
|
1
1
|
GIT
|
2
|
-
remote: https://github.com/ruby
|
3
|
-
revision:
|
4
|
-
branch:
|
2
|
+
remote: https://github.com/andreibondarev/replicate-ruby.git
|
3
|
+
revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
|
4
|
+
branch: faraday-1.x
|
5
5
|
specs:
|
6
|
-
|
7
|
-
|
8
|
-
|
6
|
+
replicate-ruby (0.2.1)
|
7
|
+
addressable
|
8
|
+
faraday (>= 1.0)
|
9
|
+
faraday-multipart
|
10
|
+
faraday-retry
|
9
11
|
|
10
12
|
PATH
|
11
13
|
remote: .
|
12
14
|
specs:
|
13
|
-
langchainrb (0.3.
|
15
|
+
langchainrb (0.3.10)
|
14
16
|
|
15
17
|
GEM
|
16
18
|
remote: https://rubygems.org/
|
@@ -44,12 +46,15 @@ GEM
|
|
44
46
|
dry-monads (~> 1.6)
|
45
47
|
ruby-next-core (>= 0.15.0)
|
46
48
|
coderay (1.1.3)
|
47
|
-
cohere-ruby (0.9.
|
48
|
-
faraday (
|
49
|
-
faraday_middleware (
|
49
|
+
cohere-ruby (0.9.4)
|
50
|
+
faraday (>= 1.0.0)
|
51
|
+
faraday_middleware (>= 1.0.0)
|
50
52
|
concurrent-ruby (1.2.2)
|
51
53
|
crass (1.0.6)
|
52
54
|
diff-lcs (1.5.0)
|
55
|
+
docx (0.8.0)
|
56
|
+
nokogiri (~> 1.13, >= 1.13.0)
|
57
|
+
rubyzip (~> 2.0)
|
53
58
|
dotenv (2.7.6)
|
54
59
|
dotenv-rails (2.7.6)
|
55
60
|
dotenv (= 2.7.6)
|
@@ -136,8 +141,8 @@ GEM
|
|
136
141
|
httparty (0.21.0)
|
137
142
|
mini_mime (>= 1.0.0)
|
138
143
|
multi_xml (>= 0.5.2)
|
139
|
-
hugging-face (0.3.
|
140
|
-
faraday (
|
144
|
+
hugging-face (0.3.4)
|
145
|
+
faraday (>= 1.0)
|
141
146
|
i18n (1.13.0)
|
142
147
|
concurrent-ruby (~> 1.0)
|
143
148
|
ice_nine (0.11.2)
|
@@ -276,12 +281,12 @@ PLATFORMS
|
|
276
281
|
|
277
282
|
DEPENDENCIES
|
278
283
|
chroma-db (~> 0.3.0)
|
279
|
-
cohere-ruby (~> 0.9.
|
280
|
-
docx
|
284
|
+
cohere-ruby (~> 0.9.4)
|
285
|
+
docx (~> 0.8.0)
|
281
286
|
dotenv-rails (~> 2.7.6)
|
282
287
|
eqn (~> 1.6.5)
|
283
288
|
google_search_results (~> 2.0.0)
|
284
|
-
hugging-face (~> 0.3.
|
289
|
+
hugging-face (~> 0.3.4)
|
285
290
|
langchainrb!
|
286
291
|
milvus (~> 0.9.0)
|
287
292
|
pdf-reader (~> 1.4)
|
@@ -289,6 +294,7 @@ DEPENDENCIES
|
|
289
294
|
pry-byebug (~> 3.10.0)
|
290
295
|
qdrant-ruby (~> 0.9.0)
|
291
296
|
rake (~> 13.0)
|
297
|
+
replicate-ruby!
|
292
298
|
rspec (~> 3.0)
|
293
299
|
ruby-openai (~> 4.0.0)
|
294
300
|
standardrb
|
data/README.md
CHANGED
@@ -28,13 +28,13 @@ require "langchain"
|
|
28
28
|
|
29
29
|
#### Supported vector search databases and features:
|
30
30
|
|
31
|
-
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
32
|
-
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
33
|
-
| Chroma
|
34
|
-
| Milvus
|
35
|
-
| Pinecone | :white_check_mark: |
|
36
|
-
| Qdrant
|
37
|
-
| Weaviate | :white_check_mark: |
|
31
|
+
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
32
|
+
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
33
|
+
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
34
|
+
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
35
|
+
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
36
|
+
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
37
|
+
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
38
38
|
|
39
39
|
### Using Vector Search Databases 🔍
|
40
40
|
|
@@ -76,8 +76,9 @@ client.add_texts(
|
|
76
76
|
# Store the contents of your files in your vector search database
|
77
77
|
my_pdf = Langchain.root.join("path/to/my.pdf")
|
78
78
|
my_text = Langchain.root.join("path/to/my.txt")
|
79
|
+
my_docx = Langchain.root.join("path/to/my.docx")
|
79
80
|
|
80
|
-
client.add_data(paths: [my_pdf, my_text])
|
81
|
+
client.add_data(paths: [my_pdf, my_text, my_docx])
|
81
82
|
```
|
82
83
|
```ruby
|
83
84
|
# Retrieve similar documents based on the query string passed in
|
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
|
|
130
131
|
|
131
132
|
#### HuggingFace
|
132
133
|
Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
|
134
|
+
```ruby
|
135
|
+
cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
|
136
|
+
```
|
137
|
+
|
138
|
+
#### Replicate
|
139
|
+
Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
|
140
|
+
```ruby
|
141
|
+
cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
142
|
+
```
|
133
143
|
|
134
144
|
### Using Prompts 📋
|
135
145
|
|
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
|
|
254
264
|
|
255
265
|
| Name | Class | Gem Requirements |
|
256
266
|
| ---- | ------------- | :--------------------------: |
|
267
|
+
| docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
|
257
268
|
| pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
|
258
269
|
| text | Loaders::Text | |
|
259
270
|
|
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
|
|
14
14
|
# Create the default schema.
|
15
15
|
chroma.create_default_schema
|
16
16
|
|
17
|
+
# gem install these or add them to your Gemfile
|
18
|
+
# Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
|
19
|
+
# Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
|
20
|
+
|
17
21
|
# Set up an array of PDF and TXT documents
|
18
22
|
docs = [
|
19
23
|
Langchain.root.join("/docs/document.pdf"),
|
20
|
-
Langchain.root.join("/docs/document.txt")
|
24
|
+
Langchain.root.join("/docs/document.txt"),
|
25
|
+
Langchain.root.join("/docs/document.docx")
|
21
26
|
]
|
22
27
|
|
23
28
|
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
data/lib/langchain.rb
CHANGED
data/lib/llm/base.rb
CHANGED
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LLM
|
4
|
+
class Replicate < Base
|
5
|
+
# Wrapper around Replicate.com LLM provider
|
6
|
+
# Use it directly:
|
7
|
+
# replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
8
|
+
#
|
9
|
+
# Or pass it to be instantiated by a vector search DB:
|
10
|
+
# chroma = Vectorsearch::Chroma.new(
|
11
|
+
# url: ENV["CHROMA_URL"],
|
12
|
+
# index_name: "...",
|
13
|
+
# llm: :replicate,
|
14
|
+
# llm_api_key: ENV["REPLICATE_API_KEY"],
|
15
|
+
# )
|
16
|
+
|
17
|
+
DEFAULTS = {
|
18
|
+
# TODO: Figure out how to send the temperature to the API
|
19
|
+
temperature: 0.01, # Minimum accepted value
|
20
|
+
# TODO: Design the interface to pass and use different models
|
21
|
+
completion_model_name: "replicate/vicuna-13b",
|
22
|
+
embeddings_model_name: "creatorrr/all-mpnet-base-v2",
|
23
|
+
dimension: 384
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
# Intialize the Replicate LLM
|
27
|
+
# @param api_key [String] The API key to use
|
28
|
+
def initialize(api_key:)
|
29
|
+
depends_on "replicate-ruby"
|
30
|
+
require "replicate"
|
31
|
+
|
32
|
+
::Replicate.configure do |config|
|
33
|
+
config.api_token = api_key
|
34
|
+
end
|
35
|
+
|
36
|
+
@client = ::Replicate.client
|
37
|
+
end
|
38
|
+
|
39
|
+
# Generate an embedding for a given text
|
40
|
+
# @param text [String] The text to generate an embedding for
|
41
|
+
# @return [Hash] The embedding
|
42
|
+
def embed(text:)
|
43
|
+
response = embeddings_model.predict(input: text)
|
44
|
+
|
45
|
+
until response.finished?
|
46
|
+
response.refetch
|
47
|
+
sleep(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
response.output
|
51
|
+
end
|
52
|
+
|
53
|
+
# Generate a completion for a given prompt
|
54
|
+
# @param prompt [String] The prompt to generate a completion for
|
55
|
+
# @return [Hash] The completion
|
56
|
+
def complete(prompt:, **params)
|
57
|
+
response = completion_model.predict(prompt: prompt)
|
58
|
+
|
59
|
+
until response.finished?
|
60
|
+
response.refetch
|
61
|
+
sleep(1)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
|
65
|
+
# The first array element is missing a space at the end, so we add it manually
|
66
|
+
response.output[0] += " "
|
67
|
+
|
68
|
+
response.output.join
|
69
|
+
end
|
70
|
+
|
71
|
+
# Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
|
72
|
+
def chat(...)
|
73
|
+
complete(...)
|
74
|
+
end
|
75
|
+
|
76
|
+
alias_method :generate_embedding, :embed
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def completion_model
|
81
|
+
@completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
|
82
|
+
end
|
83
|
+
|
84
|
+
def embeddings_model
|
85
|
+
@embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dotenv-rails
|
@@ -39,47 +39,47 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 3.10.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: chroma-db
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.3.0
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.3.0
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: cohere-ruby
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
61
|
+
version: 0.9.4
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
68
|
+
version: 0.9.4
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: docx
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 0.8.0
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
82
|
+
version: 0.8.0
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: eqn
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,14 +114,14 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.3.
|
117
|
+
version: 0.3.4
|
118
118
|
type: :development
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0.3.
|
124
|
+
version: 0.3.4
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: milvus
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -164,6 +164,20 @@ dependencies:
|
|
164
164
|
- - "~>"
|
165
165
|
- !ruby/object:Gem::Version
|
166
166
|
version: 0.1.6
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: replicate-ruby
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
167
181
|
- !ruby/object:Gem::Dependency
|
168
182
|
name: qdrant-ruby
|
169
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -250,6 +264,7 @@ files:
|
|
250
264
|
- lib/llm/cohere.rb
|
251
265
|
- lib/llm/hugging_face.rb
|
252
266
|
- lib/llm/openai.rb
|
267
|
+
- lib/llm/replicate.rb
|
253
268
|
- lib/loader.rb
|
254
269
|
- lib/loaders/base.rb
|
255
270
|
- lib/loaders/docx.rb
|