langchainrb 0.3.8 → 0.3.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env.example +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +25 -8
- data/README.md +19 -8
- data/examples/pdf_store_and_query_with_chroma.rb +6 -1
- data/lib/langchain.rb +3 -1
- data/lib/llm/base.rb +2 -1
- data/lib/llm/replicate.rb +88 -0
- data/lib/loaders/base.rb +2 -0
- data/lib/loaders/docx.rb +34 -0
- data/lib/loaders/pdf.rb +2 -0
- data/lib/loaders/text.rb +2 -0
- data/lib/version.rb +1 -1
- metadata +39 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7537d1ae285b9052051f58cfa43d1d79f9fbcf7590651b3e9a9742495aa9d58a
|
4
|
+
data.tar.gz: 9ec416a4c257c3218f065ec0d42f9a467eb0298961e6d144ec3f642949e4e087
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2919f1aea592d394555b9d1b87e533f112ec36a651c41fa5ff2875741bc581f9d545936e5a24b63e89dbe122d289c1fd15c0f3c33c075b3c7cfd2fdd60e9c75f
|
7
|
+
data.tar.gz: f399765255e33aa215e2ef15bb768f73eb8f04ba279b7c0e2bd2c66cacd61bdc1fd537e7f9e6f140e7107a3d5696ef8cc37e1d2d6d330b1b619f1aacada589f9
|
data/.env.example
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.3.10] - 2023-05-19
|
4
|
+
- 🗣️ LLMs
|
5
|
+
- Introducing support for Replicate.com
|
6
|
+
|
7
|
+
## [0.3.9] - 2023-05-19
|
8
|
+
- 🚚 Loaders
|
9
|
+
- Introduce `Loaders::Docx` to parse .docx files
|
10
|
+
|
3
11
|
## [0.3.8] - 2023-05-19
|
4
12
|
- 🔍 Vectorsearch
|
5
13
|
- Introduce support for Chroma DB
|
data/Gemfile
CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
|
|
10
10
|
gem "rspec", "~> 3.0"
|
11
11
|
|
12
12
|
gem "standardrb"
|
13
|
+
|
14
|
+
# TODO: Fix this `faraday` issue where some gems are using 1.x and others are using 2.x
|
15
|
+
# Most likely everything will just need to be updated to `faraday 2.x`
|
16
|
+
gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,18 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/andreibondarev/replicate-ruby.git
|
3
|
+
revision: 6276dec02ce33ae68a57cdd362eb8e87ed29c8e6
|
4
|
+
branch: faraday-1.x
|
5
|
+
specs:
|
6
|
+
replicate-ruby (0.2.1)
|
7
|
+
addressable
|
8
|
+
faraday (>= 1.0)
|
9
|
+
faraday-multipart
|
10
|
+
faraday-retry
|
11
|
+
|
1
12
|
PATH
|
2
13
|
remote: .
|
3
14
|
specs:
|
4
|
-
langchainrb (0.3.
|
15
|
+
langchainrb (0.3.10)
|
5
16
|
|
6
17
|
GEM
|
7
18
|
remote: https://rubygems.org/
|
@@ -35,12 +46,15 @@ GEM
|
|
35
46
|
dry-monads (~> 1.6)
|
36
47
|
ruby-next-core (>= 0.15.0)
|
37
48
|
coderay (1.1.3)
|
38
|
-
cohere-ruby (0.9.
|
39
|
-
faraday (
|
40
|
-
faraday_middleware (
|
49
|
+
cohere-ruby (0.9.4)
|
50
|
+
faraday (>= 1.0.0)
|
51
|
+
faraday_middleware (>= 1.0.0)
|
41
52
|
concurrent-ruby (1.2.2)
|
42
53
|
crass (1.0.6)
|
43
54
|
diff-lcs (1.5.0)
|
55
|
+
docx (0.8.0)
|
56
|
+
nokogiri (~> 1.13, >= 1.13.0)
|
57
|
+
rubyzip (~> 2.0)
|
44
58
|
dotenv (2.7.6)
|
45
59
|
dotenv-rails (2.7.6)
|
46
60
|
dotenv (= 2.7.6)
|
@@ -127,8 +141,8 @@ GEM
|
|
127
141
|
httparty (0.21.0)
|
128
142
|
mini_mime (>= 1.0.0)
|
129
143
|
multi_xml (>= 0.5.2)
|
130
|
-
hugging-face (0.3.
|
131
|
-
faraday (
|
144
|
+
hugging-face (0.3.4)
|
145
|
+
faraday (>= 1.0)
|
132
146
|
i18n (1.13.0)
|
133
147
|
concurrent-ruby (~> 1.0)
|
134
148
|
ice_nine (0.11.2)
|
@@ -230,6 +244,7 @@ GEM
|
|
230
244
|
ruby-progressbar (1.13.0)
|
231
245
|
ruby-rc4 (0.1.5)
|
232
246
|
ruby2_keywords (0.0.5)
|
247
|
+
rubyzip (2.3.2)
|
233
248
|
standard (1.28.2)
|
234
249
|
language_server-protocol (~> 3.17.0.2)
|
235
250
|
lint_roller (~> 1.0)
|
@@ -266,11 +281,12 @@ PLATFORMS
|
|
266
281
|
|
267
282
|
DEPENDENCIES
|
268
283
|
chroma-db (~> 0.3.0)
|
269
|
-
cohere-ruby (~> 0.9.
|
284
|
+
cohere-ruby (~> 0.9.4)
|
285
|
+
docx (~> 0.8.0)
|
270
286
|
dotenv-rails (~> 2.7.6)
|
271
287
|
eqn (~> 1.6.5)
|
272
288
|
google_search_results (~> 2.0.0)
|
273
|
-
hugging-face (~> 0.3.
|
289
|
+
hugging-face (~> 0.3.4)
|
274
290
|
langchainrb!
|
275
291
|
milvus (~> 0.9.0)
|
276
292
|
pdf-reader (~> 1.4)
|
@@ -278,6 +294,7 @@ DEPENDENCIES
|
|
278
294
|
pry-byebug (~> 3.10.0)
|
279
295
|
qdrant-ruby (~> 0.9.0)
|
280
296
|
rake (~> 13.0)
|
297
|
+
replicate-ruby!
|
281
298
|
rspec (~> 3.0)
|
282
299
|
ruby-openai (~> 4.0.0)
|
283
300
|
standardrb
|
data/README.md
CHANGED
@@ -28,13 +28,13 @@ require "langchain"
|
|
28
28
|
|
29
29
|
#### Supported vector search databases and features:
|
30
30
|
|
31
|
-
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
32
|
-
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
33
|
-
| Chroma
|
34
|
-
| Milvus
|
35
|
-
| Pinecone | :white_check_mark: |
|
36
|
-
| Qdrant
|
37
|
-
| Weaviate | :white_check_mark: |
|
31
|
+
| Database | Querying | Storage | Schema Management | Backups | Rails Integration |
|
32
|
+
| -------- |:------------------:| -------:| -----------------:| -------:| -----------------:|
|
33
|
+
| [Chroma](https://trychroma.com/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
34
|
+
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
35
|
+
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
36
|
+
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
37
|
+
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
38
38
|
|
39
39
|
### Using Vector Search Databases 🔍
|
40
40
|
|
@@ -76,8 +76,9 @@ client.add_texts(
|
|
76
76
|
# Store the contents of your files in your vector search database
|
77
77
|
my_pdf = Langchain.root.join("path/to/my.pdf")
|
78
78
|
my_text = Langchain.root.join("path/to/my.txt")
|
79
|
+
my_docx = Langchain.root.join("path/to/my.docx")
|
79
80
|
|
80
|
-
client.add_data(paths: [my_pdf, my_text])
|
81
|
+
client.add_data(paths: [my_pdf, my_text, my_docx])
|
81
82
|
```
|
82
83
|
```ruby
|
83
84
|
# Retrieve similar documents based on the query string passed in
|
@@ -130,6 +131,15 @@ cohere.complete(prompt: "What is the meaning of life?")
|
|
130
131
|
|
131
132
|
#### HuggingFace
|
132
133
|
Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
|
134
|
+
```ruby
|
135
|
+
cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
|
136
|
+
```
|
137
|
+
|
138
|
+
#### Replicate
|
139
|
+
Add `gem "replicate-ruby", git: "https://github.com/andreibondarev/replicate-ruby.git", branch: "faraday-1.x"` to your Gemfile.
|
140
|
+
```ruby
|
141
|
+
cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
142
|
+
```
|
133
143
|
|
134
144
|
### Using Prompts 📋
|
135
145
|
|
@@ -254,6 +264,7 @@ Need to read data from various sources? Load it up.
|
|
254
264
|
|
255
265
|
| Name | Class | Gem Requirements |
|
256
266
|
| ---- | ------------- | :--------------------------: |
|
267
|
+
| docx | Loaders::Docx | `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` |
|
257
268
|
| pdf | Loaders::PDF | `gem "pdf-reader", "~> 1.4"` |
|
258
269
|
| text | Loaders::Text | |
|
259
270
|
|
@@ -14,10 +14,15 @@ chroma = Vectorsearch::Chroma.new(
|
|
14
14
|
# Create the default schema.
|
15
15
|
chroma.create_default_schema
|
16
16
|
|
17
|
+
# gem install these or add them to your Gemfile
|
18
|
+
# Add `gem "pdf-reader", "~> 1.4"` to your Gemfile
|
19
|
+
# Add `gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"` to your Gemfile
|
20
|
+
|
17
21
|
# Set up an array of PDF and TXT documents
|
18
22
|
docs = [
|
19
23
|
Langchain.root.join("/docs/document.pdf"),
|
20
|
-
Langchain.root.join("/docs/document.txt")
|
24
|
+
Langchain.root.join("/docs/document.txt"),
|
25
|
+
Langchain.root.join("/docs/document.docx")
|
21
26
|
]
|
22
27
|
|
23
28
|
# Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
|
data/lib/langchain.rb
CHANGED
@@ -36,6 +36,7 @@ module LLM
|
|
36
36
|
autoload :Cohere, "llm/cohere"
|
37
37
|
autoload :HuggingFace, "llm/hugging_face"
|
38
38
|
autoload :OpenAI, "llm/openai"
|
39
|
+
autoload :Replicate, "llm/replicate"
|
39
40
|
end
|
40
41
|
|
41
42
|
module Prompt
|
@@ -55,6 +56,7 @@ end
|
|
55
56
|
|
56
57
|
module Loaders
|
57
58
|
autoload :Base, "loaders/base"
|
59
|
+
autoload :Docx, "loaders/docx"
|
58
60
|
autoload :PDF, "loaders/pdf"
|
59
61
|
autoload :Text, "loaders/text"
|
60
62
|
end
|
@@ -62,4 +64,4 @@ end
|
|
62
64
|
autoload :Loader, "loader"
|
63
65
|
|
64
66
|
# Load the default Loaders
|
65
|
-
Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
|
67
|
+
Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
|
data/lib/llm/base.rb
CHANGED
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module LLM
|
4
|
+
class Replicate < Base
|
5
|
+
# Wrapper around Replicate.com LLM provider
|
6
|
+
# Use it directly:
|
7
|
+
# replicate = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
8
|
+
#
|
9
|
+
# Or pass it to be instantiated by a vector search DB:
|
10
|
+
# chroma = Vectorsearch::Chroma.new(
|
11
|
+
# url: ENV["CHROMA_URL"],
|
12
|
+
# index_name: "...",
|
13
|
+
# llm: :replicate,
|
14
|
+
# llm_api_key: ENV["REPLICATE_API_KEY"],
|
15
|
+
# )
|
16
|
+
|
17
|
+
DEFAULTS = {
|
18
|
+
# TODO: Figure out how to send the temperature to the API
|
19
|
+
temperature: 0.01, # Minimum accepted value
|
20
|
+
# TODO: Design the interface to pass and use different models
|
21
|
+
completion_model_name: "replicate/vicuna-13b",
|
22
|
+
embeddings_model_name: "creatorrr/all-mpnet-base-v2",
|
23
|
+
dimension: 384
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
# Intialize the Replicate LLM
|
27
|
+
# @param api_key [String] The API key to use
|
28
|
+
def initialize(api_key:)
|
29
|
+
depends_on "replicate-ruby"
|
30
|
+
require "replicate"
|
31
|
+
|
32
|
+
::Replicate.configure do |config|
|
33
|
+
config.api_token = api_key
|
34
|
+
end
|
35
|
+
|
36
|
+
@client = ::Replicate.client
|
37
|
+
end
|
38
|
+
|
39
|
+
# Generate an embedding for a given text
|
40
|
+
# @param text [String] The text to generate an embedding for
|
41
|
+
# @return [Hash] The embedding
|
42
|
+
def embed(text:)
|
43
|
+
response = embeddings_model.predict(input: text)
|
44
|
+
|
45
|
+
until response.finished?
|
46
|
+
response.refetch
|
47
|
+
sleep(1)
|
48
|
+
end
|
49
|
+
|
50
|
+
response.output
|
51
|
+
end
|
52
|
+
|
53
|
+
# Generate a completion for a given prompt
|
54
|
+
# @param prompt [String] The prompt to generate a completion for
|
55
|
+
# @return [Hash] The completion
|
56
|
+
def complete(prompt:, **params)
|
57
|
+
response = completion_model.predict(prompt: prompt)
|
58
|
+
|
59
|
+
until response.finished?
|
60
|
+
response.refetch
|
61
|
+
sleep(1)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Response comes back as an array of strings, e.g.: ["Hi", "how ", "are ", "you?"]
|
65
|
+
# The first array element is missing a space at the end, so we add it manually
|
66
|
+
response.output[0] += " "
|
67
|
+
|
68
|
+
response.output.join
|
69
|
+
end
|
70
|
+
|
71
|
+
# Cohere does not have a dedicated chat endpoint, so instead we call `complete()`
|
72
|
+
def chat(...)
|
73
|
+
complete(...)
|
74
|
+
end
|
75
|
+
|
76
|
+
alias_method :generate_embedding, :embed
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def completion_model
|
81
|
+
@completion_model ||= client.retrieve_model(DEFAULTS[:completion_model_name]).latest_version
|
82
|
+
end
|
83
|
+
|
84
|
+
def embeddings_model
|
85
|
+
@embeddings_model ||= client.retrieve_model(DEFAULTS[:embeddings_model_name]).latest_version
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
data/lib/loaders/base.rb
CHANGED
data/lib/loaders/docx.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Loaders
|
4
|
+
class Docx < Base
|
5
|
+
#
|
6
|
+
# This Loader parses Docx files into text.
|
7
|
+
# If you'd like to use it directly you can do so like this:
|
8
|
+
# Loaders::Docx.new("path/to/my.docx").load
|
9
|
+
#
|
10
|
+
# This parser is also invoked when you're adding data to a Vectorsearch DB:
|
11
|
+
# qdrant = Vectorsearch::Qdrant.new(...)
|
12
|
+
# path = Langchain.root.join("path/to/my.docx")
|
13
|
+
# qdrant.add_data(path: path)
|
14
|
+
#
|
15
|
+
|
16
|
+
def initialize(path)
|
17
|
+
depends_on "docx"
|
18
|
+
require "docx"
|
19
|
+
|
20
|
+
@path = path
|
21
|
+
end
|
22
|
+
|
23
|
+
# Check that the file is a `.docx` file
|
24
|
+
def loadable?
|
25
|
+
@path.to_s.end_with?(".docx")
|
26
|
+
end
|
27
|
+
|
28
|
+
def load
|
29
|
+
::Docx::Document
|
30
|
+
.open(@path.to_s)
|
31
|
+
.text
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/loaders/pdf.rb
CHANGED
data/lib/loaders/text.rb
CHANGED
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dotenv-rails
|
@@ -38,34 +38,48 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: 3.10.0
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: chroma-db
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.3.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.3.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: cohere-ruby
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - "~>"
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.9.
|
61
|
+
version: 0.9.4
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - "~>"
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.9.
|
68
|
+
version: 0.9.4
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
70
|
+
name: docx
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
58
72
|
requirements:
|
59
73
|
- - "~>"
|
60
74
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.
|
75
|
+
version: 0.8.0
|
62
76
|
type: :development
|
63
77
|
prerelease: false
|
64
78
|
version_requirements: !ruby/object:Gem::Requirement
|
65
79
|
requirements:
|
66
80
|
- - "~>"
|
67
81
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.
|
82
|
+
version: 0.8.0
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: eqn
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -100,14 +114,14 @@ dependencies:
|
|
100
114
|
requirements:
|
101
115
|
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0.3.
|
117
|
+
version: 0.3.4
|
104
118
|
type: :development
|
105
119
|
prerelease: false
|
106
120
|
version_requirements: !ruby/object:Gem::Requirement
|
107
121
|
requirements:
|
108
122
|
- - "~>"
|
109
123
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0.3.
|
124
|
+
version: 0.3.4
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: milvus
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +164,20 @@ dependencies:
|
|
150
164
|
- - "~>"
|
151
165
|
- !ruby/object:Gem::Version
|
152
166
|
version: 0.1.6
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: replicate-ruby
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - ">="
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - ">="
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0'
|
153
181
|
- !ruby/object:Gem::Dependency
|
154
182
|
name: qdrant-ruby
|
155
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -236,8 +264,10 @@ files:
|
|
236
264
|
- lib/llm/cohere.rb
|
237
265
|
- lib/llm/hugging_face.rb
|
238
266
|
- lib/llm/openai.rb
|
267
|
+
- lib/llm/replicate.rb
|
239
268
|
- lib/loader.rb
|
240
269
|
- lib/loaders/base.rb
|
270
|
+
- lib/loaders/docx.rb
|
241
271
|
- lib/loaders/pdf.rb
|
242
272
|
- lib/loaders/text.rb
|
243
273
|
- lib/prompt/base.rb
|