langchainrb 0.6.15 → 0.6.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/README.md +4 -0
- data/lib/langchain/chunker/sentence.rb +29 -0
- data/lib/langchain/prompt/base.rb +1 -1
- data/lib/langchain/prompt/few_shot_prompt_template.rb +1 -0
- data/lib/langchain/vectorsearch/base.rb +31 -14
- data/lib/langchain/vectorsearch/chroma.rb +2 -2
- data/lib/langchain/vectorsearch/milvus.rb +1 -1
- data/lib/langchain/vectorsearch/pgvector.rb +1 -1
- data/lib/langchain/vectorsearch/pinecone.rb +1 -1
- data/lib/langchain/vectorsearch/prompts/hyde.yaml +10 -0
- data/lib/langchain/vectorsearch/prompts/rag.yaml +11 -0
- data/lib/langchain/vectorsearch/qdrant.rb +1 -1
- data/lib/langchain/vectorsearch/weaviate.rb +1 -1
- data/lib/langchain/version.rb +1 -1
- metadata +21 -5
- data/lib/langchain/utils/token_length/ollama_validator.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
|
4
|
+
data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
|
7
|
+
data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -97,6 +97,10 @@ client.similarity_search(
|
|
97
97
|
)
|
98
98
|
```
|
99
99
|
```ruby
|
100
|
+
# Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
|
101
|
+
client.similarity_search_with_hyde()
|
102
|
+
```
|
103
|
+
```ruby
|
100
104
|
# Retrieve similar documents based on the embedding passed in
|
101
105
|
client.similarity_search_by_vector(
|
102
106
|
embedding:,
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "pragmatic_segmenter"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# This chunker splits text by sentences.
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# Langchain::Chunker::Sentence.new(text).chunks
|
12
|
+
#
|
13
|
+
class Sentence < Base
|
14
|
+
attr_reader :text
|
15
|
+
|
16
|
+
# @param text [String]
|
17
|
+
# @return [Langchain::Chunker::Sentence]
|
18
|
+
def initialize(text)
|
19
|
+
@text = text
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Array<String>]
|
23
|
+
def chunks
|
24
|
+
ps = PragmaticSegmenter::Segmenter.new(text: text)
|
25
|
+
ps.segment
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -34,7 +34,7 @@ module Langchain::Prompt
|
|
34
34
|
# @return [void]
|
35
35
|
#
|
36
36
|
def validate(template:, input_variables:)
|
37
|
-
input_variables_set =
|
37
|
+
input_variables_set = input_variables.uniq
|
38
38
|
variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
|
39
39
|
|
40
40
|
missing_variables = variables_from_template - input_variables_set
|
@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
|
|
128
128
|
raise NotImplementedError, "#{self.class.name} does not support similarity search"
|
129
129
|
end
|
130
130
|
|
131
|
+
# Paper: https://arxiv.org/abs/2212.10496
|
132
|
+
# Hypothetical Document Embeddings (HyDE)-augmented similarity search
|
133
|
+
#
|
134
|
+
# @param query [String] The query to search for
|
135
|
+
# @param k [Integer] The number of results to return
|
136
|
+
# @return [String] Response
|
137
|
+
def similarity_search_with_hyde(query:, k: 4)
|
138
|
+
hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
|
139
|
+
similarity_search(query: hyde_completion, k: k)
|
140
|
+
end
|
141
|
+
|
131
142
|
# Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
|
132
143
|
# You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
|
133
144
|
def similarity_search_by_vector(...)
|
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
|
|
142
153
|
def_delegators :llm,
|
143
154
|
:default_dimension
|
144
155
|
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
)
|
153
|
-
examples: [
|
154
|
-
{context: context}
|
155
|
-
],
|
156
|
-
input_variables: ["question"],
|
157
|
-
example_separator: "\n"
|
156
|
+
# HyDE-style prompt
|
157
|
+
#
|
158
|
+
# @param [String] User's question
|
159
|
+
# @return [String] Prompt
|
160
|
+
def generate_hyde_prompt(question:)
|
161
|
+
prompt_template = Langchain::Prompt.load_from_path(
|
162
|
+
# Zero-shot prompt to generate a hypothetical document based on a given question
|
163
|
+
file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
|
158
164
|
)
|
159
|
-
|
160
165
|
prompt_template.format(question: question)
|
161
166
|
end
|
162
167
|
|
168
|
+
# Retrieval Augmented Generation (RAG)
|
169
|
+
#
|
170
|
+
# @param question [String] User's question
|
171
|
+
# @param context [String] The context to synthesize the answer from
|
172
|
+
# @return [String] Prompt
|
173
|
+
def generate_rag_prompt(question:, context:)
|
174
|
+
prompt_template = Langchain::Prompt.load_from_path(
|
175
|
+
file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
|
176
|
+
)
|
177
|
+
prompt_template.format(question: question, context: context)
|
178
|
+
end
|
179
|
+
|
163
180
|
def add_data(paths:)
|
164
181
|
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
165
182
|
|
@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
|
|
37
37
|
id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
|
38
38
|
embedding: llm.embed(text: text),
|
39
39
|
# TODO: Add support for passing metadata
|
40
|
-
metadata:
|
40
|
+
metadata: {}, # metadatas[index],
|
41
41
|
document: text # Do we actually need to store the whole original document?
|
42
42
|
)
|
43
43
|
end
|
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
|
|
124
124
|
|
125
125
|
context = context.join("\n---\n")
|
126
126
|
|
127
|
-
prompt =
|
127
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
128
128
|
|
129
129
|
llm.chat(prompt: prompt, &block)
|
130
130
|
end
|
@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
|
|
148
148
|
|
149
149
|
context = content_data.join("\n---\n")
|
150
150
|
|
151
|
-
prompt =
|
151
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
152
152
|
|
153
153
|
llm.chat(prompt: prompt, &block)
|
154
154
|
end
|
@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
|
|
144
144
|
end
|
145
145
|
context = context.join("\n---\n")
|
146
146
|
|
147
|
-
prompt =
|
147
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
148
148
|
|
149
149
|
llm.chat(prompt: prompt, &block)
|
150
150
|
end
|
@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
|
|
177
177
|
end
|
178
178
|
context = context.join("\n---\n")
|
179
179
|
|
180
|
-
prompt =
|
180
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
181
181
|
|
182
182
|
llm.chat(prompt: prompt, &block)
|
183
183
|
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
|
2
|
+
_type: prompt
|
3
|
+
input_variables:
|
4
|
+
- question
|
5
|
+
template: |
|
6
|
+
Please write a passage to answer the question
|
7
|
+
|
8
|
+
Question: {question}
|
9
|
+
|
10
|
+
Passage:
|
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
|
|
134
134
|
end
|
135
135
|
context = context.join("\n---\n")
|
136
136
|
|
137
|
-
prompt =
|
137
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
138
138
|
|
139
139
|
llm.chat(prompt: prompt, &block)
|
140
140
|
end
|
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
|
|
134
134
|
end
|
135
135
|
context = context.join("\n---\n")
|
136
136
|
|
137
|
-
prompt =
|
137
|
+
prompt = generate_rag_prompt(question: question, context: context)
|
138
138
|
|
139
139
|
llm.chat(prompt: prompt, &block)
|
140
140
|
end
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-10-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.1.
|
19
|
+
version: 0.1.9
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.1.
|
26
|
+
version: 0.1.9
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: colorize
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - '='
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: 2.6.11
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pragmatic_segmenter
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.3.0
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: 0.3.0
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: dotenv-rails
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -521,6 +535,7 @@ files:
|
|
521
535
|
- lib/langchain/ai_message.rb
|
522
536
|
- lib/langchain/chunker/base.rb
|
523
537
|
- lib/langchain/chunker/recursive_text.rb
|
538
|
+
- lib/langchain/chunker/sentence.rb
|
524
539
|
- lib/langchain/chunker/text.rb
|
525
540
|
- lib/langchain/contextual_logger.rb
|
526
541
|
- lib/langchain/conversation.rb
|
@@ -572,7 +587,6 @@ files:
|
|
572
587
|
- lib/langchain/utils/token_length/base_validator.rb
|
573
588
|
- lib/langchain/utils/token_length/cohere_validator.rb
|
574
589
|
- lib/langchain/utils/token_length/google_palm_validator.rb
|
575
|
-
- lib/langchain/utils/token_length/ollama_validator.rb
|
576
590
|
- lib/langchain/utils/token_length/openai_validator.rb
|
577
591
|
- lib/langchain/utils/token_length/token_limit_exceeded.rb
|
578
592
|
- lib/langchain/vectorsearch/base.rb
|
@@ -581,6 +595,8 @@ files:
|
|
581
595
|
- lib/langchain/vectorsearch/milvus.rb
|
582
596
|
- lib/langchain/vectorsearch/pgvector.rb
|
583
597
|
- lib/langchain/vectorsearch/pinecone.rb
|
598
|
+
- lib/langchain/vectorsearch/prompts/hyde.yaml
|
599
|
+
- lib/langchain/vectorsearch/prompts/rag.yaml
|
584
600
|
- lib/langchain/vectorsearch/qdrant.rb
|
585
601
|
- lib/langchain/vectorsearch/weaviate.rb
|
586
602
|
- lib/langchain/version.rb
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "tiktoken_ruby"
|
4
|
-
|
5
|
-
module Langchain
|
6
|
-
module Utils
|
7
|
-
module TokenLength
|
8
|
-
#
|
9
|
-
# This class is meant to validate the length of the text passed in to Ollama.
|
10
|
-
# It is used to validate the token length before the API call is made
|
11
|
-
#
|
12
|
-
class OllamaValidator < BaseValidator
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|