langchainrb 0.8.0 → 0.8.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +7 -5
- data/lib/langchain/chunker/markdown.rb +39 -0
- data/lib/langchain/data.rb +4 -3
- data/lib/langchain/llm/google_palm.rb +1 -1
- data/lib/langchain/llm/google_vertex_ai.rb +99 -5
- data/lib/langchain/llm/response/google_vertex_ai_response.rb +9 -0
- data/lib/langchain/llm/response/ollama_response.rb +1 -1
- data/lib/langchain/loader.rb +3 -2
- data/lib/langchain/output_parsers/output_fixing_parser.rb +1 -1
- data/lib/langchain/processors/markdown.rb +17 -0
- data/lib/langchain/prompt/loading.rb +1 -1
- data/lib/langchain/utils/token_length/ai21_validator.rb +4 -0
- data/lib/langchain/utils/token_length/base_validator.rb +1 -1
- data/lib/langchain/utils/token_length/cohere_validator.rb +4 -0
- data/lib/langchain/utils/token_length/google_palm_validator.rb +4 -0
- data/lib/langchain/utils/token_length/openai_validator.rb +41 -0
- data/lib/langchain/vectorsearch/base.rb +5 -3
- data/lib/langchain/vectorsearch/epsilla.rb +147 -0
- data/lib/langchain/vectorsearch/pinecone.rb +2 -2
- data/lib/langchain/version.rb +1 -1
- metadata +19 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13eec34cc529732ddfb8994956659bd4307a79ebfd76ff883fe3b6644d647c24
|
4
|
+
data.tar.gz: ce04acfe42a6a8da5a5951734651dd0083f7d2efc43cf4b3367710c8221ee96a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2094d99610311a1583d890f8c6898605bcd3e76d2fb72deb1ccd4b250f2b98f7a883401faf2e161b97b82fb29f6e64ead8843d8af22f0bd3e8a4c872c150c134
|
7
|
+
data.tar.gz: d7ce155cbb992e651aa8dc468ed1ee39bd96d1457f50faa11a32d7caac87086f5d8a381fc2b50aaba10ac934486ed415d5e609f47ee0426b4187540e2436b2e9
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.8.2]
|
4
|
+
- Introducing new `Langchain::Chunker::Markdown` chunker (thanks @spikex)
|
5
|
+
- Fixes
|
6
|
+
|
7
|
+
## [0.8.1]
|
8
|
+
- Support for Epsilla vector DB
|
9
|
+
- Fully functioning Google Vertex AI LLM
|
10
|
+
- Bug fixes
|
11
|
+
|
3
12
|
## [0.8.0]
|
4
13
|
- [BREAKING] Updated llama_cpp.rb to 0.9.4. The model file format used by the underlying llama.cpp library has changed to GGUF. llama.cpp ships with scripts to convert existing files and GGUF format models can be downloaded from HuggingFace.
|
5
14
|
- Introducing Langchain::LLM::GoogleVertexAi LLM provider
|
data/README.md
CHANGED
@@ -90,22 +90,22 @@ llm.embed(text: "foo bar")
|
|
90
90
|
|
91
91
|
Generate a text completion:
|
92
92
|
```ruby
|
93
|
-
llm.complete(prompt: "What is the meaning of life?")
|
93
|
+
llm.complete(prompt: "What is the meaning of life?").completion
|
94
94
|
```
|
95
95
|
|
96
96
|
Generate a chat completion:
|
97
97
|
```ruby
|
98
|
-
llm.chat(prompt: "Hey! How are you?")
|
98
|
+
llm.chat(prompt: "Hey! How are you?").completion
|
99
99
|
```
|
100
100
|
|
101
101
|
Summarize the text:
|
102
102
|
```ruby
|
103
|
-
llm.
|
103
|
+
llm.summarize(text: "...").completion
|
104
104
|
```
|
105
105
|
|
106
106
|
You can use any other LLM by invoking the same interface:
|
107
107
|
```ruby
|
108
|
-
llm = Langchain::LLM::GooglePalm.new(...)
|
108
|
+
llm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"], default_options: { ... })
|
109
109
|
```
|
110
110
|
|
111
111
|
### Prompt Management
|
@@ -251,7 +251,7 @@ Then parse the llm response:
|
|
251
251
|
|
252
252
|
```ruby
|
253
253
|
llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
254
|
-
llm_response = llm.chat(prompt: prompt_text)
|
254
|
+
llm_response = llm.chat(prompt: prompt_text).completion
|
255
255
|
parser.parse(llm_response)
|
256
256
|
# {
|
257
257
|
# "name" => "Kim Ji-hyun",
|
@@ -310,6 +310,7 @@ Langchain.rb provides a convenient unified interface on top of supported vectors
|
|
310
310
|
| Database | Open-source | Cloud offering |
|
311
311
|
| -------- |:------------------:| :------------: |
|
312
312
|
| [Chroma](https://trychroma.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ |
|
313
|
+
| [Epsilla](https://epsilla.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ |
|
313
314
|
| [Hnswlib](https://github.com/nmslib/hnswlib/?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ |
|
314
315
|
| [Milvus](https://milvus.io/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ Zilliz Cloud |
|
315
316
|
| [Pinecone](https://www.pinecone.io/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ |
|
@@ -342,6 +343,7 @@ client = Langchain::Vectorsearch::Weaviate.new(
|
|
342
343
|
You can instantiate any other supported vector search database:
|
343
344
|
```ruby
|
344
345
|
client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.6.0"`
|
346
|
+
client = Langchain::Vectorsearch::Epsilla.new(...) # `gem "epsilla-ruby", "~> 0.0.3"`
|
345
347
|
client = Langchain::Vectorsearch::Hnswlib.new(...) # `gem "hnswlib", "~> 0.8.1"`
|
346
348
|
client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.2"`
|
347
349
|
client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "baran"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# Simple text chunker
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# Langchain::Chunker::Markdown.new(text).chunks
|
12
|
+
#
|
13
|
+
class Markdown < Base
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap
|
15
|
+
|
16
|
+
# @param [String] text
|
17
|
+
# @param [Integer] chunk_size
|
18
|
+
# @param [Integer] chunk_overlap
|
19
|
+
# @param [String] separator
|
20
|
+
def initialize(text, chunk_size: 1000, chunk_overlap: 200)
|
21
|
+
@text = text
|
22
|
+
@chunk_size = chunk_size
|
23
|
+
@chunk_overlap = chunk_overlap
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Array<Langchain::Chunk>]
|
27
|
+
def chunks
|
28
|
+
splitter = Baran::MarkdownSplitter.new(
|
29
|
+
chunk_size: chunk_size,
|
30
|
+
chunk_overlap: chunk_overlap
|
31
|
+
)
|
32
|
+
|
33
|
+
splitter.chunks(text).map do |chunk|
|
34
|
+
Langchain::Chunk.new(text: chunk[:text])
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/langchain/data.rb
CHANGED
@@ -9,9 +9,10 @@ module Langchain
|
|
9
9
|
|
10
10
|
# @param data [String] data that was loaded
|
11
11
|
# @option options [String] :source URL or Path of the data source
|
12
|
-
def initialize(data,
|
13
|
-
@source =
|
12
|
+
def initialize(data, source: nil, chunker: Langchain::Chunker::Text)
|
13
|
+
@source = source
|
14
14
|
@data = data
|
15
|
+
@chunker_klass = chunker
|
15
16
|
end
|
16
17
|
|
17
18
|
# @return [String]
|
@@ -22,7 +23,7 @@ module Langchain
|
|
22
23
|
# @param opts [Hash] options passed to the chunker
|
23
24
|
# @return [Array<String>]
|
24
25
|
def chunks(opts = {})
|
25
|
-
|
26
|
+
@chunker_klass.new(@data, **opts).chunks
|
26
27
|
end
|
27
28
|
end
|
28
29
|
end
|
@@ -12,22 +12,30 @@ module Langchain::LLM
|
|
12
12
|
#
|
13
13
|
class GoogleVertexAi < Base
|
14
14
|
DEFAULTS = {
|
15
|
-
temperature: 0.
|
15
|
+
temperature: 0.1, # 0.1 is the default in the API, quite low ("grounded")
|
16
|
+
max_output_tokens: 1000,
|
17
|
+
top_p: 0.8,
|
18
|
+
top_k: 40,
|
16
19
|
dimension: 768,
|
20
|
+
completion_model_name: "text-bison", # Optional: tect-bison@001
|
17
21
|
embeddings_model_name: "textembedding-gecko"
|
18
22
|
}.freeze
|
19
23
|
|
20
|
-
|
24
|
+
# Google Cloud has a project id and a specific region of deployment.
|
25
|
+
# For GenAI-related things, a safe choice is us-central1.
|
26
|
+
attr_reader :project_id, :client, :region
|
21
27
|
|
22
28
|
def initialize(project_id:, default_options: {})
|
23
29
|
depends_on "google-apis-aiplatform_v1"
|
24
30
|
|
25
31
|
@project_id = project_id
|
32
|
+
@region = default_options.fetch :region, "us-central1"
|
26
33
|
|
27
34
|
@client = Google::Apis::AiplatformV1::AiplatformService.new
|
28
35
|
|
29
36
|
# TODO: Adapt for other regions; Pass it in via the constructor
|
30
|
-
|
37
|
+
# For the moment only us-central1 available so no big deal.
|
38
|
+
@client.root_url = "https://#{@region}-aiplatform.googleapis.com/"
|
31
39
|
@client.authorization = Google::Auth.get_application_default
|
32
40
|
|
33
41
|
@defaults = DEFAULTS.merge(default_options)
|
@@ -37,7 +45,7 @@ module Langchain::LLM
|
|
37
45
|
# Generate an embedding for a given text
|
38
46
|
#
|
39
47
|
# @param text [String] The text to generate an embedding for
|
40
|
-
# @return [Langchain::LLM::
|
48
|
+
# @return [Langchain::LLM::GoogleVertexAiResponse] Response object
|
41
49
|
#
|
42
50
|
def embed(text:)
|
43
51
|
content = [{content: text}]
|
@@ -45,11 +53,97 @@ module Langchain::LLM
|
|
45
53
|
|
46
54
|
api_path = "projects/#{@project_id}/locations/us-central1/publishers/google/models/#{@defaults[:embeddings_model_name]}"
|
47
55
|
|
48
|
-
puts("api_path: #{api_path}")
|
56
|
+
# puts("api_path: #{api_path}")
|
49
57
|
|
50
58
|
response = client.predict_project_location_publisher_model(api_path, request)
|
51
59
|
|
52
60
|
Langchain::LLM::GoogleVertexAiResponse.new(response.to_h, model: @defaults[:embeddings_model_name])
|
53
61
|
end
|
62
|
+
|
63
|
+
#
|
64
|
+
# Generate a completion for a given prompt
|
65
|
+
#
|
66
|
+
# @param prompt [String] The prompt to generate a completion for
|
67
|
+
# @param params extra parameters passed to GooglePalmAPI::Client#generate_text
|
68
|
+
# @return [Langchain::LLM::GooglePalmResponse] Response object
|
69
|
+
#
|
70
|
+
def complete(prompt:, **params)
|
71
|
+
default_params = {
|
72
|
+
prompt: prompt,
|
73
|
+
temperature: @defaults[:temperature],
|
74
|
+
top_k: @defaults[:top_k],
|
75
|
+
top_p: @defaults[:top_p],
|
76
|
+
max_output_tokens: @defaults[:max_output_tokens],
|
77
|
+
model: @defaults[:completion_model_name]
|
78
|
+
}
|
79
|
+
|
80
|
+
if params[:stop_sequences]
|
81
|
+
default_params[:stop_sequences] = params.delete(:stop_sequences)
|
82
|
+
end
|
83
|
+
|
84
|
+
if params[:max_output_tokens]
|
85
|
+
default_params[:max_output_tokens] = params.delete(:max_output_tokens)
|
86
|
+
end
|
87
|
+
|
88
|
+
# to be tested
|
89
|
+
temperature = params.delete(:temperature) || @defaults[:temperature]
|
90
|
+
max_output_tokens = default_params.fetch(:max_output_tokens, @defaults[:max_output_tokens])
|
91
|
+
|
92
|
+
default_params.merge!(params)
|
93
|
+
|
94
|
+
# response = client.generate_text(**default_params)
|
95
|
+
request = Google::Apis::AiplatformV1::GoogleCloudAiplatformV1PredictRequest.new \
|
96
|
+
instances: [{
|
97
|
+
prompt: prompt # key used to be :content, changed to :prompt
|
98
|
+
}],
|
99
|
+
parameters: {
|
100
|
+
temperature: temperature,
|
101
|
+
maxOutputTokens: max_output_tokens,
|
102
|
+
topP: 0.8,
|
103
|
+
topK: 40
|
104
|
+
}
|
105
|
+
|
106
|
+
response = client.predict_project_location_publisher_model \
|
107
|
+
"projects/#{project_id}/locations/us-central1/publishers/google/models/#{@defaults[:completion_model_name]}",
|
108
|
+
request
|
109
|
+
|
110
|
+
Langchain::LLM::GoogleVertexAiResponse.new(response, model: default_params[:model])
|
111
|
+
end
|
112
|
+
|
113
|
+
#
|
114
|
+
# Generate a summarization for a given text
|
115
|
+
#
|
116
|
+
# @param text [String] The text to generate a summarization for
|
117
|
+
# @return [String] The summarization
|
118
|
+
#
|
119
|
+
# TODO(ricc): add params for Temp, topP, topK, MaxTokens and have it default to these 4 values.
|
120
|
+
def summarize(text:)
|
121
|
+
prompt_template = Langchain::Prompt.load_from_path(
|
122
|
+
file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.yaml")
|
123
|
+
)
|
124
|
+
prompt = prompt_template.format(text: text)
|
125
|
+
|
126
|
+
complete(
|
127
|
+
prompt: prompt,
|
128
|
+
# For best temperature, topP, topK, MaxTokens for summarization: see
|
129
|
+
# https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-summarization
|
130
|
+
temperature: 0.2,
|
131
|
+
top_p: 0.95,
|
132
|
+
top_k: 40,
|
133
|
+
# Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
|
134
|
+
max_output_tokens: 256
|
135
|
+
)
|
136
|
+
end
|
137
|
+
|
138
|
+
def chat(...)
|
139
|
+
# https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chathat
|
140
|
+
# Chat params: https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chat
|
141
|
+
# \"temperature\": 0.3,\n"
|
142
|
+
# + " \"maxDecodeSteps\": 200,\n"
|
143
|
+
# + " \"topP\": 0.8,\n"
|
144
|
+
# + " \"topK\": 40\n"
|
145
|
+
# + "}";
|
146
|
+
raise NotImplementedError, "coming soon for Vertex AI.."
|
147
|
+
end
|
54
148
|
end
|
55
149
|
end
|
@@ -9,10 +9,19 @@ module Langchain::LLM
|
|
9
9
|
super(raw_response, model: model)
|
10
10
|
end
|
11
11
|
|
12
|
+
def completion
|
13
|
+
# completions&.dig(0, "output")
|
14
|
+
raw_response.predictions[0]["content"]
|
15
|
+
end
|
16
|
+
|
12
17
|
def embedding
|
13
18
|
embeddings.first
|
14
19
|
end
|
15
20
|
|
21
|
+
def completions
|
22
|
+
raw_response.predictions.map { |p| p["content"] }
|
23
|
+
end
|
24
|
+
|
16
25
|
def total_tokens
|
17
26
|
raw_response.dig(:predictions, 0, :embeddings, :statistics, :token_count)
|
18
27
|
end
|
data/lib/langchain/loader.rb
CHANGED
@@ -37,9 +37,10 @@ module Langchain
|
|
37
37
|
# @param path [String | Pathname] path to file or URL
|
38
38
|
# @param options [Hash] options passed to the processor class used to process the data
|
39
39
|
# @return [Langchain::Loader] loader instance
|
40
|
-
def initialize(path, options = {})
|
40
|
+
def initialize(path, options = {}, chunker: Langchain::Chunker::Text)
|
41
41
|
@options = options
|
42
42
|
@path = path
|
43
|
+
@chunker = chunker
|
43
44
|
end
|
44
45
|
|
45
46
|
# Is the path a URL?
|
@@ -112,7 +113,7 @@ module Langchain
|
|
112
113
|
processor_klass.new(@options).parse(@raw_data)
|
113
114
|
end
|
114
115
|
|
115
|
-
Langchain::Data.new(result)
|
116
|
+
Langchain::Data.new(result, source: @options[:source], chunker: @chunker)
|
116
117
|
end
|
117
118
|
|
118
119
|
def processor_klass
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Markdown < Base
|
6
|
+
EXTENSIONS = [".markdown", ".md"]
|
7
|
+
CONTENT_TYPES = ["text/markdown"]
|
8
|
+
|
9
|
+
# Parse the document and return the text
|
10
|
+
# @param [File] data
|
11
|
+
# @return [String]
|
12
|
+
def parse(data)
|
13
|
+
data.read
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -33,7 +33,7 @@ module Langchain::Prompt
|
|
33
33
|
when ".json"
|
34
34
|
config = JSON.parse(File.read(file_path))
|
35
35
|
when ".yaml", ".yml"
|
36
|
-
config = YAML.
|
36
|
+
config = YAML.safe_load_file(file_path)
|
37
37
|
else
|
38
38
|
raise ArgumentError, "Got unsupported file type #{file_path.extname}"
|
39
39
|
end
|
@@ -31,6 +31,10 @@ module Langchain
|
|
31
31
|
TOKEN_LIMITS[model_name]
|
32
32
|
end
|
33
33
|
singleton_class.alias_method :completion_token_limit, :token_limit
|
34
|
+
|
35
|
+
def self.token_length_from_messages(messages, model_name, options)
|
36
|
+
messages.sum { |message| token_length(message.to_json, model_name, options) }
|
37
|
+
end
|
34
38
|
end
|
35
39
|
end
|
36
40
|
end
|
@@ -14,7 +14,7 @@ module Langchain
|
|
14
14
|
class BaseValidator
|
15
15
|
def self.validate_max_tokens!(content, model_name, options = {})
|
16
16
|
text_token_length = if content.is_a?(Array)
|
17
|
-
content
|
17
|
+
token_length_from_messages(content, model_name, options)
|
18
18
|
else
|
19
19
|
token_length(content, model_name, options)
|
20
20
|
end
|
@@ -39,6 +39,10 @@ module Langchain
|
|
39
39
|
TOKEN_LIMITS[model_name]
|
40
40
|
end
|
41
41
|
singleton_class.alias_method :completion_token_limit, :token_limit
|
42
|
+
|
43
|
+
def self.token_length_from_messages(messages, model_name, options)
|
44
|
+
messages.sum { |message| token_length(message.to_json, model_name, options) }
|
45
|
+
end
|
42
46
|
end
|
43
47
|
end
|
44
48
|
end
|
@@ -43,6 +43,10 @@ module Langchain
|
|
43
43
|
response.dig("tokenCount")
|
44
44
|
end
|
45
45
|
|
46
|
+
def self.token_length_from_messages(messages, model_name, options)
|
47
|
+
messages.sum { |message| token_length(message.to_json, model_name, options) }
|
48
|
+
end
|
49
|
+
|
46
50
|
def self.token_limit(model_name)
|
47
51
|
TOKEN_LIMITS.dig(model_name, "input_token_limit")
|
48
52
|
end
|
@@ -75,6 +75,47 @@ module Langchain
|
|
75
75
|
max_tokens = super(content, model_name, options)
|
76
76
|
[options[:max_tokens], max_tokens].reject(&:nil?).min
|
77
77
|
end
|
78
|
+
|
79
|
+
# Copied from https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
80
|
+
# Return the number of tokens used by a list of messages
|
81
|
+
#
|
82
|
+
# @param messages [Array<Hash>] The messages to calculate the token length for
|
83
|
+
# @param model [String] The model name to validate against
|
84
|
+
# @return [Integer] The token length of the messages
|
85
|
+
#
|
86
|
+
def self.token_length_from_messages(messages, model_name, options = {})
|
87
|
+
encoding = Tiktoken.encoding_for_model(model_name)
|
88
|
+
|
89
|
+
if ["gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k-0613", "gpt-4-0314", "gpt-4-32k-0314", "gpt-4-0613", "gpt-4-32k-0613"].include?(model_name)
|
90
|
+
tokens_per_message = 3
|
91
|
+
tokens_per_name = 1
|
92
|
+
elsif model_name == "gpt-3.5-turbo-0301"
|
93
|
+
tokens_per_message = 4 # every message follows {role/name}\n{content}\n
|
94
|
+
tokens_per_name = -1 # if there's a name, the role is omitted
|
95
|
+
elsif model_name.include?("gpt-3.5-turbo")
|
96
|
+
puts "Warning: gpt-3.5-turbo may update over time. Returning num tokens assuming gpt-3.5-turbo-0613."
|
97
|
+
return token_length_from_messages(messages, "gpt-3.5-turbo-0613", options)
|
98
|
+
elsif model_name.include?("gpt-4")
|
99
|
+
puts "Warning: gpt-4 may update over time. Returning num tokens assuming gpt-4-0613."
|
100
|
+
return token_length_from_messages(messages, "gpt-4-0613", options)
|
101
|
+
else
|
102
|
+
raise NotImplementedError.new(
|
103
|
+
"token_length_from_messages() is not implemented for model #{model_name}. See https://github.com/openai/openai-python/blob/main/chatml.md for information on how messages are converted to tokens."
|
104
|
+
)
|
105
|
+
end
|
106
|
+
|
107
|
+
num_tokens = 0
|
108
|
+
messages.each do |message|
|
109
|
+
num_tokens += tokens_per_message
|
110
|
+
message.each do |key, value|
|
111
|
+
num_tokens += encoding.encode(value).length
|
112
|
+
num_tokens += tokens_per_name if ["name", :name].include?(key)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
num_tokens += 3 # every reply is primed with assistant
|
117
|
+
num_tokens
|
118
|
+
end
|
78
119
|
end
|
79
120
|
end
|
80
121
|
end
|
@@ -7,6 +7,7 @@ module Langchain::Vectorsearch
|
|
7
7
|
# == Available vector databases
|
8
8
|
#
|
9
9
|
# - {Langchain::Vectorsearch::Chroma}
|
10
|
+
# - {Langchain::Vectorsearch::Epsilla}
|
10
11
|
# - {Langchain::Vectorsearch::Elasticsearch}
|
11
12
|
# - {Langchain::Vectorsearch::Hnswlib}
|
12
13
|
# - {Langchain::Vectorsearch::Milvus}
|
@@ -29,10 +30,11 @@ module Langchain::Vectorsearch
|
|
29
30
|
# )
|
30
31
|
#
|
31
32
|
# # You can instantiate other supported vector databases the same way:
|
33
|
+
# epsilla = Langchain::Vectorsearch::Epsilla.new(...)
|
32
34
|
# milvus = Langchain::Vectorsearch::Milvus.new(...)
|
33
35
|
# qdrant = Langchain::Vectorsearch::Qdrant.new(...)
|
34
36
|
# pinecone = Langchain::Vectorsearch::Pinecone.new(...)
|
35
|
-
#
|
37
|
+
# chroma = Langchain::Vectorsearch::Chroma.new(...)
|
36
38
|
# pgvector = Langchain::Vectorsearch::Pgvector.new(...)
|
37
39
|
#
|
38
40
|
# == Schema Creation
|
@@ -173,13 +175,13 @@ module Langchain::Vectorsearch
|
|
173
175
|
prompt_template.format(question: question, context: context)
|
174
176
|
end
|
175
177
|
|
176
|
-
def add_data(paths:)
|
178
|
+
def add_data(paths:, options: {}, chunker: Langchain::Chunker::Text)
|
177
179
|
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
178
180
|
|
179
181
|
texts = Array(paths)
|
180
182
|
.flatten
|
181
183
|
.map do |path|
|
182
|
-
data = Langchain::Loader.new(path)&.load&.chunks
|
184
|
+
data = Langchain::Loader.new(path, options, chunker: chunker)&.load&.chunks
|
183
185
|
data.map { |chunk| chunk.text }
|
184
186
|
end
|
185
187
|
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "securerandom"
|
4
|
+
require "json"
|
5
|
+
require "timeout"
|
6
|
+
require "uri"
|
7
|
+
|
8
|
+
module Langchain::Vectorsearch
|
9
|
+
class Epsilla < Base
|
10
|
+
#
|
11
|
+
# Wrapper around Epsilla client library
|
12
|
+
#
|
13
|
+
# Gem requirements:
|
14
|
+
# gem "epsilla-ruby", "~> 0.0.3"
|
15
|
+
#
|
16
|
+
# Usage:
|
17
|
+
# epsilla = Langchain::Vectorsearch::Epsilla.new(url:, db_name:, db_path:, index_name:, llm:)
|
18
|
+
#
|
19
|
+
# Initialize Epsilla client
|
20
|
+
# @param url [String] URL to connect to the Epsilla db instance, protocol://host:port
|
21
|
+
# @param db_name [String] The name of the database to use
|
22
|
+
# @param db_path [String] The path to the database to use
|
23
|
+
# @param index_name [String] The name of the Epsilla table to use
|
24
|
+
# @param llm [Object] The LLM client to use
|
25
|
+
def initialize(url:, db_name:, db_path:, index_name:, llm:)
|
26
|
+
depends_on "epsilla-ruby", req: "epsilla"
|
27
|
+
|
28
|
+
uri = URI.parse(url)
|
29
|
+
protocol = uri.scheme
|
30
|
+
host = uri.host
|
31
|
+
port = uri.port
|
32
|
+
|
33
|
+
@client = ::Epsilla::Client.new(protocol, host, port)
|
34
|
+
|
35
|
+
Timeout.timeout(5) do
|
36
|
+
status_code, response = @client.database.load_db(db_name, db_path)
|
37
|
+
|
38
|
+
if status_code != 200
|
39
|
+
if status_code == 409 || (status_code == 500 && response["message"].include?("already loaded"))
|
40
|
+
# When db is already loaded, Epsilla may return HTTP 409 Conflict.
|
41
|
+
# This behavior is changed in https://github.com/epsilla-cloud/vectordb/pull/95
|
42
|
+
# Old behavior (HTTP 500) is preserved for backwards compatibility.
|
43
|
+
# It does not prevent us from using the db.
|
44
|
+
Langchain.logger.info("Database already loaded")
|
45
|
+
else
|
46
|
+
raise "Failed to load database: #{response}"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
@client.database.use_db(db_name)
|
52
|
+
|
53
|
+
@db_name = db_name
|
54
|
+
@db_path = db_path
|
55
|
+
@table_name = index_name
|
56
|
+
|
57
|
+
@vector_dimension = llm.default_dimension
|
58
|
+
|
59
|
+
super(llm: llm)
|
60
|
+
end
|
61
|
+
|
62
|
+
# Create a table using the index_name passed in the constructor
|
63
|
+
def create_default_schema
|
64
|
+
status_code, response = @client.database.create_table(@table_name, [
|
65
|
+
{"name" => "ID", "dataType" => "STRING", "primaryKey" => true},
|
66
|
+
{"name" => "Doc", "dataType" => "STRING"},
|
67
|
+
{"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimension}
|
68
|
+
])
|
69
|
+
raise "Failed to create table: #{response}" if status_code != 200
|
70
|
+
|
71
|
+
response
|
72
|
+
end
|
73
|
+
|
74
|
+
# Drop the table using the index_name passed in the constructor
|
75
|
+
def destroy_default_schema
|
76
|
+
status_code, response = @client.database.drop_table(@table_name)
|
77
|
+
raise "Failed to drop table: #{response}" if status_code != 200
|
78
|
+
|
79
|
+
response
|
80
|
+
end
|
81
|
+
|
82
|
+
# Add a list of texts to the database
|
83
|
+
# @param texts [Array<String>] The list of texts to add
|
84
|
+
# @param ids [Array<String>] The unique ids to add to the index, in the same order as the texts; if nil, it will be random uuids
|
85
|
+
def add_texts(texts:, ids: nil)
|
86
|
+
validated_ids = ids
|
87
|
+
if ids.nil?
|
88
|
+
validated_ids = texts.map { SecureRandom.uuid }
|
89
|
+
elsif ids.length != texts.length
|
90
|
+
raise "The number of ids must match the number of texts"
|
91
|
+
end
|
92
|
+
|
93
|
+
data = texts.map.with_index do |text, idx|
|
94
|
+
{Doc: text, Embedding: llm.embed(text: text).embedding, ID: validated_ids[idx]}
|
95
|
+
end
|
96
|
+
|
97
|
+
status_code, response = @client.database.insert(@table_name, data)
|
98
|
+
raise "Failed to insert texts: #{response}" if status_code != 200
|
99
|
+
response
|
100
|
+
end
|
101
|
+
|
102
|
+
# Search for similar texts
|
103
|
+
# @param query [String] The text to search for
|
104
|
+
# @param k [Integer] The number of results to return
|
105
|
+
# @return [String] The response from the server
|
106
|
+
def similarity_search(query:, k: 4)
|
107
|
+
embedding = llm.embed(text: query).embedding
|
108
|
+
|
109
|
+
similarity_search_by_vector(
|
110
|
+
embedding: embedding,
|
111
|
+
k: k
|
112
|
+
)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Search for entries by embedding
|
116
|
+
# @param embedding [Array<Float>] The embedding to search for
|
117
|
+
# @param k [Integer] The number of results to return
|
118
|
+
# @return [String] The response from the server
|
119
|
+
def similarity_search_by_vector(embedding:, k: 4)
|
120
|
+
status_code, response = @client.database.query(@table_name, "Embedding", embedding, ["Doc"], k, false)
|
121
|
+
raise "Failed to do similarity search: #{response}" if status_code != 200
|
122
|
+
|
123
|
+
data = JSON.parse(response)["result"]
|
124
|
+
data.map { |result| result["Doc"] }
|
125
|
+
end
|
126
|
+
|
127
|
+
# Ask a question and return the answer
|
128
|
+
# @param question [String] The question to ask
|
129
|
+
# @param k [Integer] The number of results to have in context
|
130
|
+
# @yield [String] Stream responses back one String at a time
|
131
|
+
# @return [String] The answer to the question
|
132
|
+
def ask(question:, k: 4, &block)
|
133
|
+
search_results = similarity_search(query: question, k: k)
|
134
|
+
|
135
|
+
context = search_results.map do |result|
|
136
|
+
result.to_s
|
137
|
+
end
|
138
|
+
context = context.join("\n---\n")
|
139
|
+
|
140
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
141
|
+
|
142
|
+
response = llm.chat(prompt: prompt, &block)
|
143
|
+
response.context = context
|
144
|
+
response
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -64,13 +64,13 @@ module Langchain::Vectorsearch
|
|
64
64
|
index.upsert(vectors: vectors, namespace: namespace)
|
65
65
|
end
|
66
66
|
|
67
|
-
def add_data(paths:, namespace: "")
|
67
|
+
def add_data(paths:, namespace: "", options: {}, chunker: Langchain::Chunker::Text)
|
68
68
|
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
69
69
|
|
70
70
|
texts = Array(paths)
|
71
71
|
.flatten
|
72
72
|
.map do |path|
|
73
|
-
data = Langchain::Loader.new(path)&.load&.chunks
|
73
|
+
data = Langchain::Loader.new(path, options, chunker: chunker)&.load&.chunks
|
74
74
|
data.map { |chunk| chunk.text }
|
75
75
|
end
|
76
76
|
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-12-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -276,6 +276,20 @@ dependencies:
|
|
276
276
|
- - "~>"
|
277
277
|
- !ruby/object:Gem::Version
|
278
278
|
version: 8.2.0
|
279
|
+
- !ruby/object:Gem::Dependency
|
280
|
+
name: epsilla-ruby
|
281
|
+
requirement: !ruby/object:Gem::Requirement
|
282
|
+
requirements:
|
283
|
+
- - "~>"
|
284
|
+
- !ruby/object:Gem::Version
|
285
|
+
version: 0.0.4
|
286
|
+
type: :development
|
287
|
+
prerelease: false
|
288
|
+
version_requirements: !ruby/object:Gem::Requirement
|
289
|
+
requirements:
|
290
|
+
- - "~>"
|
291
|
+
- !ruby/object:Gem::Version
|
292
|
+
version: 0.0.4
|
279
293
|
- !ruby/object:Gem::Dependency
|
280
294
|
name: eqn
|
281
295
|
requirement: !ruby/object:Gem::Requirement
|
@@ -604,6 +618,7 @@ files:
|
|
604
618
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
|
605
619
|
- lib/langchain/chunk.rb
|
606
620
|
- lib/langchain/chunker/base.rb
|
621
|
+
- lib/langchain/chunker/markdown.rb
|
607
622
|
- lib/langchain/chunker/prompts/semantic_prompt_template.yml
|
608
623
|
- lib/langchain/chunker/recursive_text.rb
|
609
624
|
- lib/langchain/chunker/semantic.rb
|
@@ -663,6 +678,7 @@ files:
|
|
663
678
|
- lib/langchain/processors/html.rb
|
664
679
|
- lib/langchain/processors/json.rb
|
665
680
|
- lib/langchain/processors/jsonl.rb
|
681
|
+
- lib/langchain/processors/markdown.rb
|
666
682
|
- lib/langchain/processors/pdf.rb
|
667
683
|
- lib/langchain/processors/text.rb
|
668
684
|
- lib/langchain/processors/xlsx.rb
|
@@ -688,6 +704,7 @@ files:
|
|
688
704
|
- lib/langchain/vectorsearch/base.rb
|
689
705
|
- lib/langchain/vectorsearch/chroma.rb
|
690
706
|
- lib/langchain/vectorsearch/elasticsearch.rb
|
707
|
+
- lib/langchain/vectorsearch/epsilla.rb
|
691
708
|
- lib/langchain/vectorsearch/hnswlib.rb
|
692
709
|
- lib/langchain/vectorsearch/milvus.rb
|
693
710
|
- lib/langchain/vectorsearch/pgvector.rb
|