langchainrb 0.6.19 → 0.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +26 -2
- data/lib/langchain/evals/ragas/answer_relevance.rb +71 -0
- data/lib/langchain/evals/ragas/context_relevance.rb +46 -0
- data/lib/langchain/evals/ragas/faithfulness.rb +83 -0
- data/lib/langchain/evals/ragas/main.rb +70 -0
- data/lib/langchain/evals/ragas/prompts/answer_relevance.yml +10 -0
- data/lib/langchain/evals/ragas/prompts/context_relevance.yml +10 -0
- data/lib/langchain/evals/ragas/prompts/faithfulness_statements_extraction.yml +9 -0
- data/lib/langchain/evals/ragas/prompts/faithfulness_statements_verification.yml +27 -0
- data/lib/langchain/utils/cosine_similarity.rb +34 -0
- data/lib/langchain/vectorsearch/base.rb +1 -2
- data/lib/langchain/vectorsearch/chroma.rb +1 -1
- data/lib/langchain/vectorsearch/pgvector.rb +0 -4
- data/lib/langchain/vectorsearch/pinecone.rb +1 -1
- data/lib/langchain/vectorsearch/qdrant.rb +1 -1
- data/lib/langchain/vectorsearch/weaviate.rb +1 -1
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -2
- metadata +43 -8
- data/lib/langchain/active_record/hooks.rb +0 -112
- data/lib/langchain/railtie.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21e6cb42af2a2a6892ab2c4dd76ad993b41574ca7a903702997ad20a9380ff6e
|
4
|
+
data.tar.gz: 620eb70528fb4bbeaf6c9b268717d491e4f74063ea4a897404d3ac429f9f1b93
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a82bf546ca46559c966e0669266b6f9b6184f01268b5c82ebfa312a400f9b2480479550fdf78341ccbd05a9c170a44ae0730fb3b9ea594f6d8bd59484b7699b
|
7
|
+
data.tar.gz: cae88e17f88a407c16caa29b69b61fcede6e1655c05d1b1710496852c921e036bf1d732dc31d391b07deb402ec44098f36831ed7305e7789dff223b440db0438
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.7.1] - 2023-10-26
|
4
|
+
- Ragas evals tool to evaluate Retrieval Augmented Generation (RAG) pipelines
|
5
|
+
|
6
|
+
## [0.7.0] - 2023-10-22
|
7
|
+
- BREAKING: Moving Rails-specific code to `langchainrb_rails` gem
|
8
|
+
|
3
9
|
## [0.6.19] - 2023-10-18
|
4
10
|
- Elasticsearch vector search support
|
5
11
|
- Fix `lib/langchain/railtie.rb` not being loaded with the gem
|
data/README.md
CHANGED
@@ -4,8 +4,6 @@
|
|
4
4
|
|
5
5
|
👨💻👩💻 CURRENTLY SEEKING PEOPLE TO FORM THE CORE GROUP OF MAINTAINERS WITH
|
6
6
|
|
7
|
-
:warning: UNDER ACTIVE AND RAPID DEVELOPMENT (MAY BE BUGGY AND UNTESTED)
|
8
|
-
|
9
7
|
![Tests status](https://github.com/andreibondarev/langchainrb/actions/workflows/ci.yml/badge.svg?branch=main)
|
10
8
|
[![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
|
11
9
|
[![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
|
@@ -521,6 +519,32 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
|
|
521
519
|
## Examples
|
522
520
|
Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
|
523
521
|
|
522
|
+
## Evaluations (Evals)
|
523
|
+
The Evaluations module is a collection of tools that can be used to evaluate and track the performance of the output products by LLM and your RAG (Retrieval Augmented Generation) pipelines.
|
524
|
+
|
525
|
+
### RAGAS
|
526
|
+
Ragas is helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. The implementation is based on this [paper](https://arxiv.org/abs/2309.15217) and the original Python [repo](https://github.com/explodinggradients/ragas). Ragas tracks the 3 following metrics and assigns the 0.0 - 1.0 scores:
|
527
|
+
* Faithfulness - the answer is grounded in the given context
|
528
|
+
* Context Relevance - the retrieved context is focused, containing as little irrelevant information as possible
|
529
|
+
* Answer Relevance - the generated answer addresses the actual question that was provided
|
530
|
+
|
531
|
+
```ruby
|
532
|
+
# We recommend using Langchain::LLM::OpenAI as your llm for Ragas
|
533
|
+
ragas = Langchain::Evals::Ragas::Main.new(llm: llm)
|
534
|
+
|
535
|
+
# The answer that the LLM generated
|
536
|
+
# The question (or the original prompt) that was asked
|
537
|
+
# The context that was retrieved (usually from a vectorsearch database)
|
538
|
+
ragas.score(answer: "", question: "", context: "")
|
539
|
+
# =>
|
540
|
+
# {
|
541
|
+
# ragas_score: 0.6601257446503674,
|
542
|
+
# answer_relevance_score: 0.9573145866787608,
|
543
|
+
# context_relevance_score: 0.6666666666666666,
|
544
|
+
# faithfulness_score: 0.5
|
545
|
+
# }
|
546
|
+
```
|
547
|
+
|
524
548
|
## Logging
|
525
549
|
|
526
550
|
LangChain.rb uses standard logging mechanisms and defaults to `:warn` level. Most messages are at info level, but we will add debug or warn statements as needed.
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# freeze_string_literal: true
|
2
|
+
|
3
|
+
require "matrix"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Evals
|
7
|
+
module Ragas
|
8
|
+
# Answer Relevance refers to the idea that the generated answer should address the actual question that was provided.
|
9
|
+
# This metric evaluates how closely the generated answer aligns with the initial question or instruction.
|
10
|
+
class AnswerRelevance
|
11
|
+
attr_reader :llm, :batch_size
|
12
|
+
|
13
|
+
# @param llm [Langchain::LLM::*] Langchain::LLM::* object
|
14
|
+
# @param batch_size [Integer] Batch size, i.e., number of generated questions to compare to the original question
|
15
|
+
def initialize(llm:, batch_size: 3)
|
16
|
+
@llm = llm
|
17
|
+
@batch_size = batch_size
|
18
|
+
end
|
19
|
+
|
20
|
+
# @param question [String] Question
|
21
|
+
# @param answer [String] Answer
|
22
|
+
# @return [Float] Answer Relevance score
|
23
|
+
def score(question:, answer:)
|
24
|
+
generated_questions = []
|
25
|
+
|
26
|
+
batch_size.times do |i|
|
27
|
+
prompt = answer_relevance_prompt_template.format(
|
28
|
+
question: question,
|
29
|
+
answer: answer
|
30
|
+
)
|
31
|
+
generated_questions << llm.complete(prompt: prompt).completion
|
32
|
+
end
|
33
|
+
|
34
|
+
scores = generated_questions.map do |generated_question|
|
35
|
+
calculate_similarity(original_question: question, generated_question: generated_question)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Find the mean
|
39
|
+
scores.sum(0.0) / scores.size
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
# @param question_1 [String] Question 1
|
45
|
+
# @param question_2 [String] Question 2
|
46
|
+
# @return [Float] Dot product similarity between the two questions
|
47
|
+
def calculate_similarity(original_question:, generated_question:)
|
48
|
+
original_embedding = generate_embedding(original_question)
|
49
|
+
generated_embedding = generate_embedding(generated_question)
|
50
|
+
|
51
|
+
vector_1 = Vector.elements(original_embedding)
|
52
|
+
vector_2 = Vector.elements(generated_embedding)
|
53
|
+
vector_1.inner_product(vector_2)
|
54
|
+
end
|
55
|
+
|
56
|
+
# @param text [String] Text to generate an embedding for
|
57
|
+
# @return [Array<Float>] Embedding
|
58
|
+
def generate_embedding(text)
|
59
|
+
llm.embed(text: text).embedding
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [PromptTemplate] PromptTemplate instance
|
63
|
+
def answer_relevance_prompt_template
|
64
|
+
@template ||= Langchain::Prompt.load_from_path(
|
65
|
+
file_path: Langchain.root.join("langchain/evals/ragas/prompts/answer_relevance.yml")
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# freeze_string_literal: true
|
2
|
+
|
3
|
+
require "pragmatic_segmenter"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Evals
|
7
|
+
module Ragas
|
8
|
+
# Context Relevance refers to the idea that the retrieved context should be focused, containing as little irrelevant information as possible.
|
9
|
+
class ContextRelevance
|
10
|
+
attr_reader :llm
|
11
|
+
|
12
|
+
# @param llm [Langchain::LLM::*] Langchain::LLM::* object
|
13
|
+
def initialize(llm:)
|
14
|
+
@llm = llm
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param question [String] Question
|
18
|
+
# @param context [String] Context
|
19
|
+
# @return [Float] Context Relevance score
|
20
|
+
def score(question:, context:)
|
21
|
+
prompt = context_relevance_prompt_template.format(
|
22
|
+
question: question,
|
23
|
+
context: context
|
24
|
+
)
|
25
|
+
sentences = llm.complete(prompt: prompt).completion
|
26
|
+
|
27
|
+
(sentence_count(sentences).to_f / sentence_count(context).to_f)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def sentence_count(context)
|
33
|
+
ps = PragmaticSegmenter::Segmenter.new(text: context)
|
34
|
+
ps.segment.length
|
35
|
+
end
|
36
|
+
|
37
|
+
# @return [PromptTemplate] PromptTemplate instance
|
38
|
+
def context_relevance_prompt_template
|
39
|
+
@template ||= Langchain::Prompt.load_from_path(
|
40
|
+
file_path: Langchain.root.join("langchain/evals/ragas/prompts/context_relevance.yml")
|
41
|
+
)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# freeze_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Evals
|
5
|
+
module Ragas
|
6
|
+
# Faithfulness refers to the idea that the answer should be grounded in the given context,
|
7
|
+
# ensuring that the retrieved context can act as a justification for the generated answer.
|
8
|
+
# The answer is faithful to the context if the claims that are made in the answer can be inferred from the context.
|
9
|
+
#
|
10
|
+
# Score calculation:
|
11
|
+
# F = |V| / |S|
|
12
|
+
#
|
13
|
+
# F = Faithfulness
|
14
|
+
# |V| = Number of statements that were supported according to the LLM
|
15
|
+
# |S| = Total number of statements extracted.
|
16
|
+
#
|
17
|
+
class Faithfulness
|
18
|
+
attr_reader :llm
|
19
|
+
|
20
|
+
# @param llm [Langchain::LLM::*] Langchain::LLM::* object
|
21
|
+
def initialize(llm:)
|
22
|
+
@llm = llm
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param question [String] Question
|
26
|
+
# @param answer [String] Answer
|
27
|
+
# @param context [String] Context
|
28
|
+
# @return [Float] Faithfulness score
|
29
|
+
def score(question:, answer:, context:)
|
30
|
+
statements = statements_extraction(question: question, answer: answer)
|
31
|
+
statements_count = statements
|
32
|
+
.split("\n")
|
33
|
+
.count
|
34
|
+
|
35
|
+
verifications = statements_verification(statements: statements, context: context)
|
36
|
+
verifications_count = count_verified_statements(verifications)
|
37
|
+
|
38
|
+
(verifications_count.to_f / statements_count.to_f)
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def count_verified_statements(verifications)
|
44
|
+
match = verifications.match(/Final verdict for each statement in order:\s*(.*)/)
|
45
|
+
verdicts = match.captures.first
|
46
|
+
verdicts
|
47
|
+
.split(".")
|
48
|
+
.count { |value| value.strip.to_boolean }
|
49
|
+
end
|
50
|
+
|
51
|
+
def statements_verification(statements:, context:)
|
52
|
+
prompt = statements_verification_prompt_template.format(
|
53
|
+
statements: statements,
|
54
|
+
context: context
|
55
|
+
)
|
56
|
+
llm.complete(prompt: prompt).completion
|
57
|
+
end
|
58
|
+
|
59
|
+
def statements_extraction(question:, answer:)
|
60
|
+
prompt = statements_extraction_prompt_template.format(
|
61
|
+
question: question,
|
62
|
+
answer: answer
|
63
|
+
)
|
64
|
+
llm.complete(prompt: prompt).completion
|
65
|
+
end
|
66
|
+
|
67
|
+
# @return [PromptTemplate] PromptTemplate instance
|
68
|
+
def statements_verification_prompt_template
|
69
|
+
@template_two ||= Langchain::Prompt.load_from_path(
|
70
|
+
file_path: Langchain.root.join("langchain/evals/ragas/prompts/faithfulness_statements_verification.yml")
|
71
|
+
)
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [PromptTemplate] PromptTemplate instance
|
75
|
+
def statements_extraction_prompt_template
|
76
|
+
@template_one ||= Langchain::Prompt.load_from_path(
|
77
|
+
file_path: Langchain.root.join("langchain/evals/ragas/prompts/faithfulness_statements_extraction.yml")
|
78
|
+
)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# freeze_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Evals
|
5
|
+
# The RAGAS (Retrieval Augmented Generative Assessment) is a framework for evaluating RAG (Retrieval Augmented Generation) pipelines.
|
6
|
+
# Based on the following research: https://arxiv.org/pdf/2309.15217.pdf
|
7
|
+
module Ragas
|
8
|
+
class Main
|
9
|
+
attr_reader :llm
|
10
|
+
|
11
|
+
def initialize(llm:)
|
12
|
+
@llm = llm
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns the RAGAS scores, e.g.:
|
16
|
+
# {
|
17
|
+
# ragas_score: 0.6601257446503674,
|
18
|
+
# answer_relevance_score: 0.9573145866787608,
|
19
|
+
# context_relevance_score: 0.6666666666666666,
|
20
|
+
# faithfulness_score: 0.5
|
21
|
+
# }
|
22
|
+
#
|
23
|
+
# @param question [String] Question
|
24
|
+
# @param answer [String] Answer
|
25
|
+
# @param context [String] Context
|
26
|
+
# @return [Hash] RAGAS scores
|
27
|
+
def score(question:, answer:, context:)
|
28
|
+
answer_relevance_score = answer_relevance.score(question: question, answer: answer)
|
29
|
+
context_relevance_score = context_relevance.score(question: question, context: context)
|
30
|
+
faithfulness_score = faithfulness.score(question: question, answer: answer, context: context)
|
31
|
+
|
32
|
+
{
|
33
|
+
ragas_score: ragas_score(answer_relevance_score, context_relevance_score, faithfulness_score),
|
34
|
+
answer_relevance_score: answer_relevance_score,
|
35
|
+
context_relevance_score: context_relevance_score,
|
36
|
+
faithfulness_score: faithfulness_score
|
37
|
+
}
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# Overall RAGAS score (harmonic mean): https://github.com/explodinggradients/ragas/blob/1dd363e3e54744e67b0be85962a0258d8121500a/src/ragas/evaluation.py#L140-L143
|
43
|
+
#
|
44
|
+
# @param answer_relevance_score [Float] Answer Relevance score
|
45
|
+
# @param context_relevance_score [Float] Context Relevance score
|
46
|
+
# @param faithfulness_score [Float] Faithfulness score
|
47
|
+
# @return [Float] RAGAS score
|
48
|
+
def ragas_score(answer_relevance_score, context_relevance_score, faithfulness_score)
|
49
|
+
reciprocal_sum = (1.0 / answer_relevance_score) + (1.0 / context_relevance_score) + (1.0 / faithfulness_score)
|
50
|
+
(3 / reciprocal_sum)
|
51
|
+
end
|
52
|
+
|
53
|
+
# @return [Langchain::Evals::Ragas::AnswerRelevance] Class instance
|
54
|
+
def answer_relevance
|
55
|
+
@answer_relevance ||= Langchain::Evals::Ragas::AnswerRelevance.new(llm: llm)
|
56
|
+
end
|
57
|
+
|
58
|
+
# @return [Langchain::Evals::Ragas::ContextRelevance] Class instance
|
59
|
+
def context_relevance
|
60
|
+
@context_relevance ||= Langchain::Evals::Ragas::ContextRelevance.new(llm: llm)
|
61
|
+
end
|
62
|
+
|
63
|
+
# @return [Langchain::Evals::Ragas::Faithfulness] Class instance
|
64
|
+
def faithfulness
|
65
|
+
@faithfulness ||= Langchain::Evals::Ragas::Faithfulness.new(llm: llm)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
_type: prompt
|
2
|
+
input_variables:
|
3
|
+
- answer
|
4
|
+
template: |
|
5
|
+
Generate question for the given answer.
|
6
|
+
Answer: The PSLV-C56 mission is scheduled to be launched on Sunday, 30 July 2023 at 06:30 IST / 01:00 UTC. It will be launched from the Satish Dhawan Space Centre, Sriharikota, Andhra Pradesh, India
|
7
|
+
Question: When is the scheduled launch date and time for the PSLV-C56 mission, and where will it be launched from?
|
8
|
+
|
9
|
+
Answer: {answer}
|
10
|
+
Question:
|
@@ -0,0 +1,10 @@
|
|
1
|
+
_type: prompt
|
2
|
+
input_variables:
|
3
|
+
- question
|
4
|
+
- context
|
5
|
+
template: |
|
6
|
+
Please extract relevant sentences from the provided context that is absolutely required answer the following question. If no relevant sentences are found, or if you believe the question cannot be answered from the given context, return the phrase "Insufficient Information". While extracting candidate sentences you're not allowed to make any changes to sentences from given context.
|
7
|
+
|
8
|
+
question:{question}
|
9
|
+
context:\n{context}
|
10
|
+
candidate sentences:\n
|
@@ -0,0 +1,27 @@
|
|
1
|
+
_type: prompt
|
2
|
+
input_variables:
|
3
|
+
- statements
|
4
|
+
- context
|
5
|
+
template: |
|
6
|
+
Consider the given context and following statements, then determine whether they are supported by the information present in the context.
|
7
|
+
Provide a brief explanation for each statement before arriving at the verdict (Yes/No). Provide a final verdict for each statement in order at the end in the given format.
|
8
|
+
Do not deviate from the specified format.
|
9
|
+
|
10
|
+
Context:\nJohn is a student at XYZ University. He is pursuing a degree in Computer Science. He is enrolled in several courses this semester, including Data Structures, Algorithms, and Database Management. John is a diligent student and spends a significant amount of time studying and completing assignments. He often stays late in the library to work on his projects.
|
11
|
+
statements:\n1. John is majoring in Biology.\n2. John is taking a course on Artificial Intelligence.\n3. John is a dedicated student.\n4. John has a part-time job.\n5. John is interested in computer programming.\n
|
12
|
+
Answer:
|
13
|
+
1. John is majoring in Biology.
|
14
|
+
Explanation: John's major is explicitly mentioned as Computer Science. There is no information suggesting he is majoring in Biology. Verdict: No.
|
15
|
+
2. John is taking a course on Artificial Intelligence.
|
16
|
+
Explanation: The context mentions the courses John is currently enrolled in, and Artificial Intelligence is not mentioned. Therefore, it cannot be deduced that John is taking a course on AI. Verdict: No.
|
17
|
+
3. John is a dedicated student.
|
18
|
+
Explanation: The prompt states that he spends a significant amount of time studying and completing assignments. Additionally, it mentions that he often stays late in the library to work on his projects, which implies dedication. Verdict: Yes.
|
19
|
+
4. John has a part-time job.
|
20
|
+
Explanation: There is no information given in the context about John having a part-time job. Therefore, it cannot be deduced that John has a part-time job. Verdict: No.
|
21
|
+
5. John is interested in computer programming.
|
22
|
+
Explanation: The context states that John is pursuing a degree in Computer Science, which implies an interest in computer programming. Verdict: Yes.
|
23
|
+
Final verdict for each statement in order: No. No. Yes. No. Yes.
|
24
|
+
|
25
|
+
context:\n{context}
|
26
|
+
statements:\n{statements}
|
27
|
+
Answer:
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Utils
|
5
|
+
class CosineSimilarity
|
6
|
+
attr_reader :vector_a, :vector_b
|
7
|
+
|
8
|
+
# @param vector_a [Array<Float>] First vector
|
9
|
+
# @param vector_b [Array<Float>] Second vector
|
10
|
+
def initialize(vector_a, vector_b)
|
11
|
+
@vector_a = vector_a
|
12
|
+
@vector_b = vector_b
|
13
|
+
end
|
14
|
+
|
15
|
+
# Calculate the cosine similarity between two vectors
|
16
|
+
# @return [Float] The cosine similarity between the two vectors
|
17
|
+
def calculate_similarity
|
18
|
+
return nil unless vector_a.is_a? Array
|
19
|
+
return nil unless vector_b.is_a? Array
|
20
|
+
return nil if vector_a.size != vector_b.size
|
21
|
+
|
22
|
+
dot_product = 0
|
23
|
+
vector_a.zip(vector_b).each do |v1i, v2i|
|
24
|
+
dot_product += v1i * v2i
|
25
|
+
end
|
26
|
+
|
27
|
+
a = vector_a.map { |n| n**2 }.reduce(:+)
|
28
|
+
b = vector_b.map { |n| n**2 }.reduce(:+)
|
29
|
+
|
30
|
+
dot_product / (Math.sqrt(a) * Math.sqrt(b))
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -25,8 +25,7 @@ module Langchain::Vectorsearch
|
|
25
25
|
# url: ENV["WEAVIATE_URL"],
|
26
26
|
# api_key: ENV["WEAVIATE_API_KEY"],
|
27
27
|
# index_name: "Documents",
|
28
|
-
# llm: :
|
29
|
-
# llm_api_key: ENV["OPENAI_API_KEY"] # API key for the selected LLM
|
28
|
+
# llm: Langchain::LLM::OpenAI.new(api_key:)
|
30
29
|
# )
|
31
30
|
#
|
32
31
|
# # You can instantiate other supported vector databases the same way:
|
@@ -9,7 +9,7 @@ module Langchain::Vectorsearch
|
|
9
9
|
# gem "chroma-db", "~> 0.6.0"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
# chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:,
|
12
|
+
# chroma = Langchain::Vectorsearch::Chroma.new(url:, index_name:, llm:, api_key: nil)
|
13
13
|
#
|
14
14
|
|
15
15
|
# Initialize the Chroma client
|
@@ -9,7 +9,7 @@ module Langchain::Vectorsearch
|
|
9
9
|
# gem "pinecone", "~> 0.1.6"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
# pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm
|
12
|
+
# pinecone = Langchain::Vectorsearch::Pinecone.new(environment:, api_key:, index_name:, llm:)
|
13
13
|
#
|
14
14
|
|
15
15
|
# Initialize the Pinecone client
|
@@ -9,7 +9,7 @@ module Langchain::Vectorsearch
|
|
9
9
|
# gem "qdrant-ruby", "~> 0.9.3"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
# qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm
|
12
|
+
# qdrant = Langchain::Vectorsearch::Qdrant.new(url:, api_key:, index_name:, llm:)
|
13
13
|
#
|
14
14
|
|
15
15
|
# Initialize the Qdrant client
|
@@ -9,7 +9,7 @@ module Langchain::Vectorsearch
|
|
9
9
|
# gem "weaviate-ruby", "~> 0.8.3"
|
10
10
|
#
|
11
11
|
# Usage:
|
12
|
-
# weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm
|
12
|
+
# weaviate = Langchain::Vectorsearch::Weaviate.new(url:, api_key:, index_name:, llm:)
|
13
13
|
#
|
14
14
|
|
15
15
|
# Initialize the Weaviate adapter
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "logger"
|
4
4
|
require "pathname"
|
5
5
|
require "colorize"
|
6
|
+
require "to_bool"
|
6
7
|
require "zeitwerk"
|
7
8
|
loader = Zeitwerk::Loader.for_gem
|
8
9
|
loader.ignore("#{__dir__}/langchainrb.rb")
|
@@ -89,5 +90,3 @@ module Langchain
|
|
89
90
|
class BaseError < StandardError; end
|
90
91
|
end
|
91
92
|
end
|
92
|
-
|
93
|
-
require "langchain/railtie" if defined?(Rails)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-10-
|
11
|
+
date: 2023-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -70,16 +70,16 @@ dependencies:
|
|
70
70
|
name: zeitwerk
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- -
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: 2.
|
75
|
+
version: '2.5'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- -
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: 2.
|
82
|
+
version: '2.5'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: pragmatic_segmenter
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,6 +94,34 @@ dependencies:
|
|
94
94
|
- - "~>"
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: 0.3.0
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: to_bool
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - "~>"
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 2.0.0
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 2.0.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: matrix
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: dotenv-rails
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -539,7 +567,6 @@ files:
|
|
539
567
|
- LICENSE.txt
|
540
568
|
- README.md
|
541
569
|
- lib/langchain.rb
|
542
|
-
- lib/langchain/active_record/hooks.rb
|
543
570
|
- lib/langchain/agent/base.rb
|
544
571
|
- lib/langchain/agent/react_agent.rb
|
545
572
|
- lib/langchain/agent/react_agent/react_agent_prompt.yaml
|
@@ -562,6 +589,14 @@ files:
|
|
562
589
|
- lib/langchain/conversation/response.rb
|
563
590
|
- lib/langchain/data.rb
|
564
591
|
- lib/langchain/dependency_helper.rb
|
592
|
+
- lib/langchain/evals/ragas/answer_relevance.rb
|
593
|
+
- lib/langchain/evals/ragas/context_relevance.rb
|
594
|
+
- lib/langchain/evals/ragas/faithfulness.rb
|
595
|
+
- lib/langchain/evals/ragas/main.rb
|
596
|
+
- lib/langchain/evals/ragas/prompts/answer_relevance.yml
|
597
|
+
- lib/langchain/evals/ragas/prompts/context_relevance.yml
|
598
|
+
- lib/langchain/evals/ragas/prompts/faithfulness_statements_extraction.yml
|
599
|
+
- lib/langchain/evals/ragas/prompts/faithfulness_statements_verification.yml
|
565
600
|
- lib/langchain/llm/ai21.rb
|
566
601
|
- lib/langchain/llm/anthropic.rb
|
567
602
|
- lib/langchain/llm/base.rb
|
@@ -601,7 +636,6 @@ files:
|
|
601
636
|
- lib/langchain/prompt/few_shot_prompt_template.rb
|
602
637
|
- lib/langchain/prompt/loading.rb
|
603
638
|
- lib/langchain/prompt/prompt_template.rb
|
604
|
-
- lib/langchain/railtie.rb
|
605
639
|
- lib/langchain/tool/base.rb
|
606
640
|
- lib/langchain/tool/calculator.rb
|
607
641
|
- lib/langchain/tool/database.rb
|
@@ -609,6 +643,7 @@ files:
|
|
609
643
|
- lib/langchain/tool/ruby_code_interpreter.rb
|
610
644
|
- lib/langchain/tool/weather.rb
|
611
645
|
- lib/langchain/tool/wikipedia.rb
|
646
|
+
- lib/langchain/utils/cosine_similarity.rb
|
612
647
|
- lib/langchain/utils/token_length/ai21_validator.rb
|
613
648
|
- lib/langchain/utils/token_length/base_validator.rb
|
614
649
|
- lib/langchain/utils/token_length/cohere_validator.rb
|
@@ -1,112 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Langchain
|
4
|
-
module ActiveRecord
|
5
|
-
# This module adds the following functionality to your ActiveRecord models:
|
6
|
-
# * `vectorsearch` class method to set the vector search provider
|
7
|
-
# * `similarity_search` class method to search for similar texts
|
8
|
-
# * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
|
9
|
-
#
|
10
|
-
# Usage:
|
11
|
-
# class Recipe < ActiveRecord::Base
|
12
|
-
# vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
|
13
|
-
# api_key: ENV["WEAVIATE_API_KEY"],
|
14
|
-
# url: ENV["WEAVIATE_URL"],
|
15
|
-
# index_name: "Recipes",
|
16
|
-
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
17
|
-
# )
|
18
|
-
#
|
19
|
-
# after_save :upsert_to_vectorsearch
|
20
|
-
#
|
21
|
-
# # Overwriting how the model is serialized before it's indexed
|
22
|
-
# def as_vector
|
23
|
-
# [
|
24
|
-
# "Title: #{title}",
|
25
|
-
# "Description: #{description}",
|
26
|
-
# ...
|
27
|
-
# ]
|
28
|
-
# .compact
|
29
|
-
# .join("\n")
|
30
|
-
# end
|
31
|
-
# end
|
32
|
-
#
|
33
|
-
# Create the default schema
|
34
|
-
# Recipe.class_variable_get(:@@provider).create_default_schema
|
35
|
-
# Query the vector search provider
|
36
|
-
# Recipe.similarity_search("carnivore dish")
|
37
|
-
# Delete the default schema to start over
|
38
|
-
# Recipe.class_variable_get(:@@provider).destroy_default_schema
|
39
|
-
#
|
40
|
-
module Hooks
|
41
|
-
def self.included(base)
|
42
|
-
base.extend ClassMethods
|
43
|
-
end
|
44
|
-
|
45
|
-
# Index the text to the vector search provider
|
46
|
-
# You'd typically call this method in an ActiveRecord callback
|
47
|
-
#
|
48
|
-
# @return [Boolean] true
|
49
|
-
# @raise [Error] Indexing to vector search DB failed
|
50
|
-
def upsert_to_vectorsearch
|
51
|
-
if previously_new_record?
|
52
|
-
self.class.class_variable_get(:@@provider).add_texts(
|
53
|
-
texts: [as_vector],
|
54
|
-
ids: [id]
|
55
|
-
)
|
56
|
-
else
|
57
|
-
self.class.class_variable_get(:@@provider).update_texts(
|
58
|
-
texts: [as_vector],
|
59
|
-
ids: [id]
|
60
|
-
)
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
# Used to serialize the DB record to an indexable vector text
|
65
|
-
# Overwrite this method in your model to customize
|
66
|
-
#
|
67
|
-
# @return [String] the text representation of the model
|
68
|
-
def as_vector
|
69
|
-
to_json
|
70
|
-
end
|
71
|
-
|
72
|
-
module ClassMethods
|
73
|
-
# Set the vector search provider
|
74
|
-
#
|
75
|
-
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
76
|
-
def vectorsearch(provider:)
|
77
|
-
class_variable_set(:@@provider, provider)
|
78
|
-
end
|
79
|
-
|
80
|
-
# Search for similar texts
|
81
|
-
#
|
82
|
-
# @param query [String] The query to search for
|
83
|
-
# @param k [Integer] The number of results to return
|
84
|
-
# @return [ActiveRecord::Relation] The ActiveRecord relation
|
85
|
-
def similarity_search(query, k: 1)
|
86
|
-
records = class_variable_get(:@@provider).similarity_search(
|
87
|
-
query: query,
|
88
|
-
k: k
|
89
|
-
)
|
90
|
-
|
91
|
-
# We use "__id" when Weaviate is the provider
|
92
|
-
ids = records.map { |record| record.dig("id") || record.dig("__id") }
|
93
|
-
where(id: ids)
|
94
|
-
end
|
95
|
-
|
96
|
-
# Ask a question and return the answer
|
97
|
-
#
|
98
|
-
# @param question [String] The question to ask
|
99
|
-
# @param k [Integer] The number of results to have in context
|
100
|
-
# @yield [String] Stream responses back one String at a time
|
101
|
-
# @return [String] The answer to the question
|
102
|
-
def ask(question:, k: 4, &block)
|
103
|
-
class_variable_get(:@@provider).ask(
|
104
|
-
question: question,
|
105
|
-
k: k,
|
106
|
-
&block
|
107
|
-
)
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
data/lib/langchain/railtie.rb
DELETED