langchainrb 0.6.15 → 0.6.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 804ffbb08baabf8d2b0372e6893ca31a8c0933425dcabc78b2b48381b045d0c9
4
- data.tar.gz: a53ed993838ab79c343618b445533c285f35e186c3a1f4412f40f7da12b9911b
3
+ metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
4
+ data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
5
5
  SHA512:
6
- metadata.gz: d4aa19658c6c6ffdd5268c6ab83abe3ba17c3bb84b3880a6347bb67fa5c1b4bf0e9304b22c477b27401394450b692d0ee545f5745c6e3a2ec2e5e2ba50779584
7
- data.tar.gz: b1c918b8d28e86b11cde99e1b976cbffcca36dbc8ac354e08ce72d9056cc5eafd6ddb601f92edfcd28907687c2247c417477173746405cd1ca2b2ec0fc51df83
6
+ metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
7
+ data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.16] - 2023-10-02
4
+ - HyDE-style similarity search
5
+ - `Langchain::Chunker::Sentence` chunker
6
+ - Bug fixes
7
+
3
8
  ## [0.6.15] - 2023-09-22
4
9
  - Bump weaviate-ruby gem version
5
10
  - Ollama support
data/README.md CHANGED
@@ -97,6 +97,10 @@ client.similarity_search(
97
97
  )
98
98
  ```
99
99
  ```ruby
100
+ # Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
101
+ client.similarity_search_with_hyde()
102
+ ```
103
+ ```ruby
100
104
  # Retrieve similar documents based on the embedding passed in
101
105
  client.similarity_search_by_vector(
102
106
  embedding:,
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pragmatic_segmenter"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # This chunker splits text by sentences.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::Sentence.new(text).chunks
12
+ #
13
+ class Sentence < Base
14
+ attr_reader :text
15
+
16
+ # @param text [String]
17
+ # @return [Langchain::Chunker::Sentence]
18
+ def initialize(text)
19
+ @text = text
20
+ end
21
+
22
+ # @return [Array<String>]
23
+ def chunks
24
+ ps = PragmaticSegmenter::Segmenter.new(text: text)
25
+ ps.segment
26
+ end
27
+ end
28
+ end
29
+ end
@@ -34,7 +34,7 @@ module Langchain::Prompt
34
34
  # @return [void]
35
35
  #
36
36
  def validate(template:, input_variables:)
37
- input_variables_set = @input_variables.uniq
37
+ input_variables_set = input_variables.uniq
38
38
  variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
39
39
 
40
40
  missing_variables = variables_from_template - input_variables_set
@@ -75,6 +75,7 @@ module Langchain::Prompt
75
75
  @prefix = prefix
76
76
  @suffix = suffix
77
77
  @example_separator = example_separator
78
+ @validate_template = validate_template
78
79
 
79
80
  validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
80
81
  end
@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
128
128
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
129
129
  end
130
130
 
131
+ # Paper: https://arxiv.org/abs/2212.10496
132
+ # Hypothetical Document Embeddings (HyDE)-augmented similarity search
133
+ #
134
+ # @param query [String] The query to search for
135
+ # @param k [Integer] The number of results to return
136
+ # @return [String] Response
137
+ def similarity_search_with_hyde(query:, k: 4)
138
+ hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
139
+ similarity_search(query: hyde_completion, k: k)
140
+ end
141
+
131
142
  # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
132
143
  # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
133
144
  def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
142
153
  def_delegators :llm,
143
154
  :default_dimension
144
155
 
145
- def generate_prompt(question:, context:)
146
- prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
147
- prefix: "Context:",
148
- suffix: "---\nQuestion: {question}\n---\nAnswer:",
149
- example_prompt: Langchain::Prompt::PromptTemplate.new(
150
- template: "{context}",
151
- input_variables: ["context"]
152
- ),
153
- examples: [
154
- {context: context}
155
- ],
156
- input_variables: ["question"],
157
- example_separator: "\n"
156
+ # HyDE-style prompt
157
+ #
158
+ # @param [String] User's question
159
+ # @return [String] Prompt
160
+ def generate_hyde_prompt(question:)
161
+ prompt_template = Langchain::Prompt.load_from_path(
162
+ # Zero-shot prompt to generate a hypothetical document based on a given question
163
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
158
164
  )
159
-
160
165
  prompt_template.format(question: question)
161
166
  end
162
167
 
168
+ # Retrieval Augmented Generation (RAG)
169
+ #
170
+ # @param question [String] User's question
171
+ # @param context [String] The context to synthesize the answer from
172
+ # @return [String] Prompt
173
+ def generate_rag_prompt(question:, context:)
174
+ prompt_template = Langchain::Prompt.load_from_path(
175
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
176
+ )
177
+ prompt_template.format(question: question, context: context)
178
+ end
179
+
163
180
  def add_data(paths:)
164
181
  raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
182
 
@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
37
37
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
38
38
  embedding: llm.embed(text: text),
39
39
  # TODO: Add support for passing metadata
40
- metadata: [], # metadatas[index],
40
+ metadata: {}, # metadatas[index],
41
41
  document: text # Do we actually need to store the whole original document?
42
42
  )
43
43
  end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
124
124
 
125
125
  context = context.join("\n---\n")
126
126
 
127
- prompt = generate_prompt(question: question, context: context)
127
+ prompt = generate_rag_prompt(question: question, context: context)
128
128
 
129
129
  llm.chat(prompt: prompt, &block)
130
130
  end
@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
148
148
 
149
149
  context = content_data.join("\n---\n")
150
150
 
151
- prompt = generate_prompt(question: question, context: context)
151
+ prompt = generate_rag_prompt(question: question, context: context)
152
152
 
153
153
  llm.chat(prompt: prompt, &block)
154
154
  end
@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
144
144
  end
145
145
  context = context.join("\n---\n")
146
146
 
147
- prompt = generate_prompt(question: question, context: context)
147
+ prompt = generate_rag_prompt(question: question, context: context)
148
148
 
149
149
  llm.chat(prompt: prompt, &block)
150
150
  end
@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
177
177
  end
178
178
  context = context.join("\n---\n")
179
179
 
180
- prompt = generate_prompt(question: question, context: context)
180
+ prompt = generate_rag_prompt(question: question, context: context)
181
181
 
182
182
  llm.chat(prompt: prompt, &block)
183
183
  end
@@ -0,0 +1,10 @@
1
+ # Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
2
+ _type: prompt
3
+ input_variables:
4
+ - question
5
+ template: |
6
+ Please write a passage to answer the question
7
+
8
+ Question: {question}
9
+
10
+ Passage:
@@ -0,0 +1,11 @@
1
+ _type: prompt
2
+ input_variables:
3
+ - question
4
+ - context
5
+ template: |
6
+ Context:
7
+ {context}
8
+ ---
9
+ Question: {question}
10
+ ---
11
+ Answer:
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.15"
4
+ VERSION = "0.6.16"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.15
4
+ version: 0.6.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-22 00:00:00.000000000 Z
11
+ date: 2023-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.8
19
+ version: 0.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.8
26
+ version: 0.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: colorize
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - '='
81
81
  - !ruby/object:Gem::Version
82
82
  version: 2.6.11
83
+ - !ruby/object:Gem::Dependency
84
+ name: pragmatic_segmenter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.3.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.3.0
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: dotenv-rails
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
521
535
  - lib/langchain/ai_message.rb
522
536
  - lib/langchain/chunker/base.rb
523
537
  - lib/langchain/chunker/recursive_text.rb
538
+ - lib/langchain/chunker/sentence.rb
524
539
  - lib/langchain/chunker/text.rb
525
540
  - lib/langchain/contextual_logger.rb
526
541
  - lib/langchain/conversation.rb
@@ -572,7 +587,6 @@ files:
572
587
  - lib/langchain/utils/token_length/base_validator.rb
573
588
  - lib/langchain/utils/token_length/cohere_validator.rb
574
589
  - lib/langchain/utils/token_length/google_palm_validator.rb
575
- - lib/langchain/utils/token_length/ollama_validator.rb
576
590
  - lib/langchain/utils/token_length/openai_validator.rb
577
591
  - lib/langchain/utils/token_length/token_limit_exceeded.rb
578
592
  - lib/langchain/vectorsearch/base.rb
@@ -581,6 +595,8 @@ files:
581
595
  - lib/langchain/vectorsearch/milvus.rb
582
596
  - lib/langchain/vectorsearch/pgvector.rb
583
597
  - lib/langchain/vectorsearch/pinecone.rb
598
+ - lib/langchain/vectorsearch/prompts/hyde.yaml
599
+ - lib/langchain/vectorsearch/prompts/rag.yaml
584
600
  - lib/langchain/vectorsearch/qdrant.rb
585
601
  - lib/langchain/vectorsearch/weaviate.rb
586
602
  - lib/langchain/version.rb
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "tiktoken_ruby"
4
-
5
- module Langchain
6
- module Utils
7
- module TokenLength
8
- #
9
- # This class is meant to validate the length of the text passed in to Ollama.
10
- # It is used to validate the token length before the API call is made
11
- #
12
- class OllamaValidator < BaseValidator
13
- end
14
- end
15
- end
16
- end