langchainrb 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.env.example +1 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +3 -3
- data/Gemfile.lock +21 -15
- data/README.md +19 -8
- data/examples/pdf_store_and_query_with_chroma.rb +6 -1
- data/lib/langchain.rb +1 -0
- data/lib/llm/base.rb +2 -1
- data/lib/llm/replicate.rb +88 -0
- data/lib/version.rb +1 -1
- metadata +29 -14
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
|
|
4
|
+
data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
|
|
7
|
+
data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
|
data/.env.example
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
|
@@ -11,6 +11,6 @@ gem "rspec", "~> 3.0"
|
|
|
11
11
|
|
|
12
12
|
gem "standardrb"
|
|
13
13
|
|
|
14
|
-
# TODO:
|
|
15
|
-
#
|
|
16
|
-
gem "
|
|
14
|
+
# TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
|
|
15
|
+
# Most likely everything will just need to be updated to `faraday 2.x`
|
|
16
|
+
gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
|
data/Gemfile.lock
CHANGED
|
@@ -1,16 +1,18 @@
|
|
|
1
1
|
GIT
|
|
2
|
-
remote: https://github.com/ruby
|
|
3
|
-
revision:
|
|
4
|
-
branch:
|
|
2
|
+
remote: https://github.com/andreibondarev/replicate-ruby.git
|
|
3
|
+
revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
|
|
4
|
+
branch: faraday-1.x
|
|
5
5
|
specs:
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
6
|
+
replicate-ruby (0.2.1)
|
|
7
|
+
addressable
|
|
8
|
+
faraday (>= 1.0)
|
|
9
|
+
faraday-multipart
|
|
10
|
+
faraday-retry
|
|
9
11
|
|
|
10
12
|
PATH
|
|
11
13
|
remote: .
|
|
12
14
|
specs:
|
|
13
|
-
langchainrb (0.3.
|
|
15
|
+
langchainrb (0.3.10)
|
|
14
16
|
|
|
15
17
|
GEM
|
|
16
18
|
remote: https://rubygems.org/
|
|
@@ -44,12 +46,15 @@ GEM
|
|
|
44
46
|
dry-monads (~> 1.6)
|
|
45
47
|
ruby-next-core (>= 0.15.0)
|
|
46
48
|
coderay (1.1.3)
|
|
47
|
-
cohere-ruby (0.9.
|
|
48
|
-
faraday (
|
|
49
|
-
faraday_middleware (
|
|
49
|
+
cohere-ruby (0.9.4)
|
|
50
|
+
faraday (>= 1.0.0)
|
|
51
|
+
faraday_middleware (>= 1.0.0)
|
|
50
52
|
concurrent-ruby (1.2.2)
|
|
51
53
|
crass (1.0.6)
|
|
52
54
|
diff-lcs (1.5.0)
|
|
55
|
+
docx (0.8.0)
|
|
56
|
+
nokogiri (~> 1.13, >= 1.13.0)
|
|
57
|
+
rubyzip (~> 2.0)
|
|
53
58
|
dotenv (2.7.6)
|
|
54
59
|
dotenv-rails (2.7.6)
|
|
55
60
|
dotenv (= 2.7.6)
|
|
@@ -136,8 +141,8 @@ GEM
|
|
|
136
141
|
httparty (0.21.0)
|
|
137
142
|
mini_mime (>= 1.0.0)
|
|
138
143
|
multi_xml (>= 0.5.2)
|
|
139
|
-
hugging-face (0.3.
|
|
140
|
-
faraday (
|
|
144
|
+
hugging-face (0.3.4)
|
|
145
|
+
faraday (>= 1.0)
|
|
141
146
|
i18n (1.13.0)
|
|
142
147
|
concurrent-ruby (~> 1.0)
|
|
143
148
|
ice_nine (0.11.2)
|
|
@@ -276,12 +281,12 @@ PLATFORMS
|
|
|
276
281
|
|
|
277
282
|
DEPENDENCIES
|
|
278
283
|
chroma-db (~> 0.3.0)
|
|
279
|
-
cohere-ruby (~> 0.9.
|
|
280
|
-
docx
|
|
284
|
+
cohere-ruby (~> 0.9.4)
|
|
285
|
+
docx (~> 0.8.0)
|
|
281
286
|
dotenv-rails (~> 2.7.6)
|
|
282
287
|
eqn (~> 1.6.5)
|
|
283
288
|
google_search_results (~> 2.0.0)
|
|
284
|
-
hugging-face (~> 0.3.
|
|
289
|
+
hugging-face (~> 0.3.4)
|
|
285
290
|
langchainrb!
|
|
286
291
|
milvus (~> 0.9.0)
|
|
287
292
|
pdf-reader (~> 1.4)
|
|
@@ -289,6 +294,7 @@ DEPENDENCIES
|
|
|
289
294
|
pry-byebug (~> 3.10.0)
|
|
290
295
|
qdrant-ruby (~> 0.9.0)
|
|
291
296
|
rake (~> 13.0)
|
|
297
|
+
replicate-ruby!
|
|
292
298
|
rspec (~> 3.0)
|
|
293
299
|
ruby-openai (~> 4.0.0)
|
|
294
300
|
standardrb
|
data/README.md
CHANGED
|
@@ -28,13 +28,13 @@ require "langchain"
|
|
|
28
28
|
|
|
29
29
|
#### Supported vector search databases and features:
|
|
30
30
|
|
|
31
|
-
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
|
32
|
-
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
|
33
|
-
| Chroma
|
|
34
|
-
| Milvus
|
|
35
|
-
| Pinecone | :white_check_mark: |
|
|
36
|
-
| Qdrant
|
|
37
|
-
| Weaviate | :white_check_mark: |
|
|
31
|
+
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
|
32
|
+
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
|
33
|
+
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
34
|
+
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
35
|
+
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
36
|
+
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
37
|
+
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
38
38
|
|
|
39
39
|
### Using Vector Search Databases 🔍
|
|
40
40
|
|
|
@@ -76,8 +76,9 @@ client.add_texts(
|
|
|
76
76
|
# Store the contents of your files in your vector search database
|
|
77
77
|
my_pdf = Langchain.root.join("path/to/my.pdf")
|
|
78
78
|
my_text = Langchain.root.join("path/to/my.txt")
|
|
79
|
+
my_docx = Langchain.root.join("path/to/my.docx")
|
|
79
80
|
|
|
80
|
-
client.add_data(paths: [my_pdf, my_text])
|
|
81
|
+
client.add_data(paths: [my_pdf, my_text, my_docx])
|
|
81
82
|
```
|
|
82
83
|
```ruby
|
|
83
84
|
# Retrieve similar documents based on the query string passed in
|
|
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
|
|
|
130
131
|
|
|
131
132
|
#### HuggingFace
|
|
132
133
|
Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
|
|
134
|
+
```ruby
|
|
135
|
+
cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
#### Replicate
|
|
139
|
+
Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
|
|
140
|
+
```ruby
|
|
141
|
+
cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
|
142
|
+
```
|
|
133
143
|
|
|
134
144
|
### Using Prompts 📋
|
|
135
145
|
|
|
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
|
|
|
254
264
|
|
|
255
265
|
| Name | Class | Gem Requirements |
|
|
256
266
|
| ---- | ------------- | :--------------------------: |
|
|
267
|
+
| docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
|
|
257
268
|
| pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
|
|
258
269
|
| text | Loaders::Text | |
|
|
259
270
|
|
|
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
|
|
|
14
14
|
# Create the default schema.
|
|
15
15
|
chroma.create_default_schema
|
|
16
16
|
|
|
17
|
+
# gem install these or add them to your Gemfile
|
|
18
|
+
# Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
|
|
19
|
+
# Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
|
|
20
|
+
|
|
17
21
|
# Set up an array of PDF and TXT documents
|
|
18
22
|
docs = [
|
|
19
23
|
Langchain.root.join("/docs/document.pdf"),
|
|
20
|
-
Langchain.root.join("/docs/document.txt")
|
|
24
|
+
Langchain.root.join("/docs/document.txt"),
|
|
25
|
+
Langchain.root.join("/docs/document.docx")
|
|
21
26
|
]
|
|
22
27
|
|
|
23
28
|
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
data/lib/langchain.rb
CHANGED
data/lib/llm/base.rb
CHANGED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module LLM
|
|
4
|
+
class Replicate < Base
|
|
5
|
+
# Wrapper around Replicate.com LLM provider
|
|
6
|
+
# Use it directly:
|
|
7
|
+
# replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
|
8
|
+
#
|
|
9
|
+
# Or pass it to be instantiated by a vector search DB:
|
|
10
|
+
# chroma = Vectorsearch::Chroma.new(
|
|
11
|
+
# url: ENV["CHROMA_URL"],
|
|
12
|
+
# index_name: "...",
|
|
13
|
+
# llm: :replicate,
|
|
14
|
+
# llm_api_key: ENV["REPLICATE_API_KEY"],
|
|
15
|
+
# )
|
|
16
|
+
|
|
17
|
+
DEFAULTS = {
|
|
18
|
+
# TODO: Figure out how to send the temperature to the API
|
|
19
|
+
temperature: 0.01, # Minimum accepted value
|
|
20
|
+
# TODO: Design the interface to pass and use different models
|
|
21
|
+
completion_model_name: "replicate/vicuna-13b",
|
|
22
|
+
embeddings_model_name: "creatorrr/all-mpnet-base-v2",
|
|
23
|
+
dimension: 384
|
|
24
|
+
}.freeze
|
|
25
|
+
|
|
26
|
+
# Intialize the Replicate LLM
|
|
27
|
+
# @param api_key [String] The API key to use
|
|
28
|
+
def initialize(api_key:)
|
|
29
|
+
depends_on "replicate-ruby"
|
|
30
|
+
require "replicate"
|
|
31
|
+
|
|
32
|
+
::Replicate.configure do |config|
|
|
33
|
+
config.api_token = api_key
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
@client = ::Replicate.client
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Generate an embedding for a given text
|
|
40
|
+
# @param text [String] The text to generate an embedding for
|
|
41
|
+
# @return [Hash] The embedding
|
|
42
|
+
def embed(text:)
|
|
43
|
+
response = embeddings_model.predict(input: text)
|
|
44
|
+
|
|
45
|
+
until response.finished?
|
|
46
|
+
response.refetch
|
|
47
|
+
sleep(1)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
response.output
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Generate a completion for a given prompt
|
|
54
|
+
# @param prompt [String] The prompt to generate a completion for
|
|
55
|
+
# @return [Hash] The completion
|
|
56
|
+
def complete(prompt:, **params)
|
|
57
|
+
response = completion_model.predict(prompt: prompt)
|
|
58
|
+
|
|
59
|
+
until response.finished?
|
|
60
|
+
response.refetch
|
|
61
|
+
sleep(1)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
|
|
65
|
+
# The first array element is missing a space at the end, so we add it manually
|
|
66
|
+
response.output[0] += " "
|
|
67
|
+
|
|
68
|
+
response.output.join
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
|
|
72
|
+
def chat(...)
|
|
73
|
+
complete(...)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
alias_method :generate_embedding, :embed
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def completion_model
|
|
81
|
+
@completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def embeddings_model
|
|
85
|
+
@embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: langchainrb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.10
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrei Bondarev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-05-
|
|
11
|
+
date: 2023-05-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: dotenv-rails
|
|
@@ -39,47 +39,47 @@ dependencies:
|
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
40
|
version: 3.10.0
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
|
-
name:
|
|
42
|
+
name: chroma-db
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
44
44
|
requirements:
|
|
45
45
|
- - "~>"
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version: 0.
|
|
47
|
+
version: 0.3.0
|
|
48
48
|
type: :development
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - "~>"
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version: 0.
|
|
54
|
+
version: 0.3.0
|
|
55
55
|
- !ruby/object:Gem::Dependency
|
|
56
|
-
name:
|
|
56
|
+
name: cohere-ruby
|
|
57
57
|
requirement: !ruby/object:Gem::Requirement
|
|
58
58
|
requirements:
|
|
59
59
|
- - "~>"
|
|
60
60
|
- !ruby/object:Gem::Version
|
|
61
|
-
version: 0.
|
|
61
|
+
version: 0.9.4
|
|
62
62
|
type: :development
|
|
63
63
|
prerelease: false
|
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
|
65
65
|
requirements:
|
|
66
66
|
- - "~>"
|
|
67
67
|
- !ruby/object:Gem::Version
|
|
68
|
-
version: 0.
|
|
68
|
+
version: 0.9.4
|
|
69
69
|
- !ruby/object:Gem::Dependency
|
|
70
70
|
name: docx
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
|
-
- - "
|
|
73
|
+
- - "~>"
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version:
|
|
75
|
+
version: 0.8.0
|
|
76
76
|
type: :development
|
|
77
77
|
prerelease: false
|
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
79
|
requirements:
|
|
80
|
-
- - "
|
|
80
|
+
- - "~>"
|
|
81
81
|
- !ruby/object:Gem::Version
|
|
82
|
-
version:
|
|
82
|
+
version: 0.8.0
|
|
83
83
|
- !ruby/object:Gem::Dependency
|
|
84
84
|
name: eqn
|
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -114,14 +114,14 @@ dependencies:
|
|
|
114
114
|
requirements:
|
|
115
115
|
- - "~>"
|
|
116
116
|
- !ruby/object:Gem::Version
|
|
117
|
-
version: 0.3.
|
|
117
|
+
version: 0.3.4
|
|
118
118
|
type: :development
|
|
119
119
|
prerelease: false
|
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
|
121
121
|
requirements:
|
|
122
122
|
- - "~>"
|
|
123
123
|
- !ruby/object:Gem::Version
|
|
124
|
-
version: 0.3.
|
|
124
|
+
version: 0.3.4
|
|
125
125
|
- !ruby/object:Gem::Dependency
|
|
126
126
|
name: milvus
|
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -164,6 +164,20 @@ dependencies:
|
|
|
164
164
|
- - "~>"
|
|
165
165
|
- !ruby/object:Gem::Version
|
|
166
166
|
version: 0.1.6
|
|
167
|
+
- !ruby/object:Gem::Dependency
|
|
168
|
+
name: replicate-ruby
|
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
|
170
|
+
requirements:
|
|
171
|
+
- - ">="
|
|
172
|
+
- !ruby/object:Gem::Version
|
|
173
|
+
version: '0'
|
|
174
|
+
type: :development
|
|
175
|
+
prerelease: false
|
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
177
|
+
requirements:
|
|
178
|
+
- - ">="
|
|
179
|
+
- !ruby/object:Gem::Version
|
|
180
|
+
version: '0'
|
|
167
181
|
- !ruby/object:Gem::Dependency
|
|
168
182
|
name: qdrant-ruby
|
|
169
183
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -250,6 +264,7 @@ files:
|
|
|
250
264
|
- lib/llm/cohere.rb
|
|
251
265
|
- lib/llm/hugging_face.rb
|
|
252
266
|
- lib/llm/openai.rb
|
|
267
|
+
- lib/llm/replicate.rb
|
|
253
268
|
- lib/loader.rb
|
|
254
269
|
- lib/loaders/base.rb
|
|
255
270
|
- lib/loaders/docx.rb
|