langchainrb 0.6.15 → 0.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 804ffbb08baabf8d2b0372e6893ca31a8c0933425dcabc78b2b48381b045d0c9
4
- data.tar.gz: a53ed993838ab79c343618b445533c285f35e186c3a1f4412f40f7da12b9911b
3
+ metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
4
+ data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
5
5
  SHA512:
6
- metadata.gz: d4aa19658c6c6ffdd5268c6ab83abe3ba17c3bb84b3880a6347bb67fa5c1b4bf0e9304b22c477b27401394450b692d0ee545f5745c6e3a2ec2e5e2ba50779584
7
- data.tar.gz: b1c918b8d28e86b11cde99e1b976cbffcca36dbc8ac354e08ce72d9056cc5eafd6ddb601f92edfcd28907687c2247c417477173746405cd1ca2b2ec0fc51df83
6
+ metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
7
+ data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.16] - 2023-10-02
4
+ - HyDE-style similarity search
5
+ - `Langchain::Chunker::Sentence` chunker
6
+ - Bug fixes
7
+
3
8
  ## [0.6.15] - 2023-09-22
4
9
  - Bump weaviate-ruby gem version
5
10
  - Ollama support
data/README.md CHANGED
@@ -97,6 +97,10 @@ client.similarity_search(
97
97
  )
98
98
  ```
99
99
  ```ruby
100
+ # Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
101
+ client.similarity_search_with_hyde()
102
+ ```
103
+ ```ruby
100
104
  # Retrieve similar documents based on the embedding passed in
101
105
  client.similarity_search_by_vector(
102
106
  embedding:,
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pragmatic_segmenter"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # This chunker splits text by sentences.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::Sentence.new(text).chunks
12
+ #
13
+ class Sentence < Base
14
+ attr_reader :text
15
+
16
+ # @param text [String]
17
+ # @return [Langchain::Chunker::Sentence]
18
+ def initialize(text)
19
+ @text = text
20
+ end
21
+
22
+ # @return [Array<String>]
23
+ def chunks
24
+ ps = PragmaticSegmenter::Segmenter.new(text: text)
25
+ ps.segment
26
+ end
27
+ end
28
+ end
29
+ end
@@ -34,7 +34,7 @@ module Langchain::Prompt
34
34
  # @return [void]
35
35
  #
36
36
  def validate(template:, input_variables:)
37
- input_variables_set = @input_variables.uniq
37
+ input_variables_set = input_variables.uniq
38
38
  variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
39
39
 
40
40
  missing_variables = variables_from_template - input_variables_set
@@ -75,6 +75,7 @@ module Langchain::Prompt
75
75
  @prefix = prefix
76
76
  @suffix = suffix
77
77
  @example_separator = example_separator
78
+ @validate_template = validate_template
78
79
 
79
80
  validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
80
81
  end
@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
128
128
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
129
129
  end
130
130
 
131
+ # Paper: https://arxiv.org/abs/2212.10496
132
+ # Hypothetical Document Embeddings (HyDE)-augmented similarity search
133
+ #
134
+ # @param query [String] The query to search for
135
+ # @param k [Integer] The number of results to return
136
+ # @return [String] Response
137
+ def similarity_search_with_hyde(query:, k: 4)
138
+ hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
139
+ similarity_search(query: hyde_completion, k: k)
140
+ end
141
+
131
142
  # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
132
143
  # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
133
144
  def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
142
153
  def_delegators :llm,
143
154
  :default_dimension
144
155
 
145
- def generate_prompt(question:, context:)
146
- prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
147
- prefix: "Context:",
148
- suffix: "---\nQuestion: {question}\n---\nAnswer:",
149
- example_prompt: Langchain::Prompt::PromptTemplate.new(
150
- template: "{context}",
151
- input_variables: ["context"]
152
- ),
153
- examples: [
154
- {context: context}
155
- ],
156
- input_variables: ["question"],
157
- example_separator: "\n"
156
+ # HyDE-style prompt
157
+ #
158
+ # @param [String] User's question
159
+ # @return [String] Prompt
160
+ def generate_hyde_prompt(question:)
161
+ prompt_template = Langchain::Prompt.load_from_path(
162
+ # Zero-shot prompt to generate a hypothetical document based on a given question
163
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
158
164
  )
159
-
160
165
  prompt_template.format(question: question)
161
166
  end
162
167
 
168
+ # Retrieval Augmented Generation (RAG)
169
+ #
170
+ # @param question [String] User's question
171
+ # @param context [String] The context to synthesize the answer from
172
+ # @return [String] Prompt
173
+ def generate_rag_prompt(question:, context:)
174
+ prompt_template = Langchain::Prompt.load_from_path(
175
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
176
+ )
177
+ prompt_template.format(question: question, context: context)
178
+ end
179
+
163
180
  def add_data(paths:)
164
181
  raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
182
 
@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
37
37
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
38
38
  embedding: llm.embed(text: text),
39
39
  # TODO: Add support for passing metadata
40
- metadata: [], # metadatas[index],
40
+ metadata: {}, # metadatas[index],
41
41
  document: text # Do we actually need to store the whole original document?
42
42
  )
43
43
  end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
124
124
 
125
125
  context = context.join("\n---\n")
126
126
 
127
- prompt = generate_prompt(question: question, context: context)
127
+ prompt = generate_rag_prompt(question: question, context: context)
128
128
 
129
129
  llm.chat(prompt: prompt, &block)
130
130
  end
@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
148
148
 
149
149
  context = content_data.join("\n---\n")
150
150
 
151
- prompt = generate_prompt(question: question, context: context)
151
+ prompt = generate_rag_prompt(question: question, context: context)
152
152
 
153
153
  llm.chat(prompt: prompt, &block)
154
154
  end
@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
144
144
  end
145
145
  context = context.join("\n---\n")
146
146
 
147
- prompt = generate_prompt(question: question, context: context)
147
+ prompt = generate_rag_prompt(question: question, context: context)
148
148
 
149
149
  llm.chat(prompt: prompt, &block)
150
150
  end
@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
177
177
  end
178
178
  context = context.join("\n---\n")
179
179
 
180
- prompt = generate_prompt(question: question, context: context)
180
+ prompt = generate_rag_prompt(question: question, context: context)
181
181
 
182
182
  llm.chat(prompt: prompt, &block)
183
183
  end
@@ -0,0 +1,10 @@
1
+ # Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
2
+ _type: prompt
3
+ input_variables:
4
+ - question
5
+ template: |
6
+ Please write a passage to answer the question
7
+
8
+ Question: {question}
9
+
10
+ Passage:
@@ -0,0 +1,11 @@
1
+ _type: prompt
2
+ input_variables:
3
+ - question
4
+ - context
5
+ template: |
6
+ Context:
7
+ {context}
8
+ ---
9
+ Question: {question}
10
+ ---
11
+ Answer:
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.15"
4
+ VERSION = "0.6.16"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.15
4
+ version: 0.6.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-22 00:00:00.000000000 Z
11
+ date: 2023-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.8
19
+ version: 0.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.8
26
+ version: 0.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: colorize
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - '='
81
81
  - !ruby/object:Gem::Version
82
82
  version: 2.6.11
83
+ - !ruby/object:Gem::Dependency
84
+ name: pragmatic_segmenter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.3.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.3.0
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: dotenv-rails
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
521
535
  - lib/langchain/ai_message.rb
522
536
  - lib/langchain/chunker/base.rb
523
537
  - lib/langchain/chunker/recursive_text.rb
538
+ - lib/langchain/chunker/sentence.rb
524
539
  - lib/langchain/chunker/text.rb
525
540
  - lib/langchain/contextual_logger.rb
526
541
  - lib/langchain/conversation.rb
@@ -572,7 +587,6 @@ files:
572
587
  - lib/langchain/utils/token_length/base_validator.rb
573
588
  - lib/langchain/utils/token_length/cohere_validator.rb
574
589
  - lib/langchain/utils/token_length/google_palm_validator.rb
575
- - lib/langchain/utils/token_length/ollama_validator.rb
576
590
  - lib/langchain/utils/token_length/openai_validator.rb
577
591
  - lib/langchain/utils/token_length/token_limit_exceeded.rb
578
592
  - lib/langchain/vectorsearch/base.rb
@@ -581,6 +595,8 @@ files:
581
595
  - lib/langchain/vectorsearch/milvus.rb
582
596
  - lib/langchain/vectorsearch/pgvector.rb
583
597
  - lib/langchain/vectorsearch/pinecone.rb
598
+ - lib/langchain/vectorsearch/prompts/hyde.yaml
599
+ - lib/langchain/vectorsearch/prompts/rag.yaml
584
600
  - lib/langchain/vectorsearch/qdrant.rb
585
601
  - lib/langchain/vectorsearch/weaviate.rb
586
602
  - lib/langchain/version.rb
@@ -1,16 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "tiktoken_ruby"
4
-
5
- module Langchain
6
- module Utils
7
- module TokenLength
8
- #
9
- # This class is meant to validate the length of the text passed in to Ollama.
10
- # It is used to validate the token length before the API call is made
11
- #
12
- class OllamaValidator < BaseValidator
13
- end
14
- end
15
- end
16
- end