langchainrb 0.6.14 → 0.6.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7beb48b4b2bc88c4a25bef1cbc9eb0e95c0bd3eaeb02af6a12e0026c9081dd6d
4
- data.tar.gz: ebdb7816bf0e37e221a13ecf6cb620a335d3d49b564be76f4f714cb9849ebeb6
3
+ metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
4
+ data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
5
5
  SHA512:
6
- metadata.gz: 1cb2c147ffbe999eb1f027161e6cda3beea76e31b821bdec564eb36cc6a2d96e31c5d450be8d744738fabef07a9f519c8b96ab2e6dc9585fb05ceea7ebc494a2
7
- data.tar.gz: 9f3a1d015de4f568bea1e08637a07ed6bf2ef93bb68068ebe51a50c16ca5a1d5d3f850cf19439ad785b6078305a7dfbd740f7bf7916c1e3466efdb04060f360e
6
+ metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
7
+ data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.16] - 2023-10-02
4
+ - HyDE-style similarity search
5
+ - `Langchain::Chunker::Sentence` chunker
6
+ - Bug fixes
7
+
8
+ ## [0.6.15] - 2023-09-22
9
+ - Bump weaviate-ruby gem version
10
+ - Ollama support
11
+
3
12
  ## [0.6.14] - 2023-09-11
4
13
  - Add `find` method to `Langchain::Vectorsearch::Qdrant`
5
14
  - Enhance Google search output
data/README.md CHANGED
@@ -97,6 +97,10 @@ client.similarity_search(
97
97
  )
98
98
  ```
99
99
  ```ruby
100
+ # Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
101
+ client.similarity_search_with_hyde()
102
+ ```
103
+ ```ruby
100
104
  # Retrieve similar documents based on the embedding passed in
101
105
  client.similarity_search_by_vector(
102
106
  embedding:,
@@ -210,6 +214,18 @@ anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
210
214
  anthropic.complete(prompt: "What is the meaning of life?")
211
215
  ```
212
216
 
217
+ #### Ollama
218
+ ```ruby
219
+ ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
220
+ ```
221
+
222
+ ```ruby
223
+ ollama.complete(prompt: "What is the meaning of life?")
224
+ ```
225
+ ```ruby
226
+ ollama.embed(text: "Hello world!")
227
+ ```
228
+
213
229
  ### Using Prompts 📋
214
230
 
215
231
  #### Prompt Templates
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pragmatic_segmenter"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # This chunker splits text by sentences.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::Sentence.new(text).chunks
12
+ #
13
+ class Sentence < Base
14
+ attr_reader :text
15
+
16
+ # @param text [String]
17
+ # @return [Langchain::Chunker::Sentence]
18
+ def initialize(text)
19
+ @text = text
20
+ end
21
+
22
+ # @return [Array<String>]
23
+ def chunks
24
+ ps = PragmaticSegmenter::Segmenter.new(text: text)
25
+ ps.segment
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::LLM
4
+ # Interface to Ollama API.
5
+ # Available models: https://ollama.ai/library
6
+ #
7
+ # Usage:
8
+ # ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
9
+ #
10
+ class Ollama < Base
11
+ attr_reader :url
12
+
13
+ DEFAULTS = {
14
+ temperature: 0.0,
15
+ completion_model_name: "llama2",
16
+ embeddings_model_name: "llama2"
17
+ }.freeze
18
+
19
+ # Initialize the Ollama client
20
+ # @param url [String] The URL of the Ollama instance
21
+ def initialize(url:)
22
+ @url = url
23
+ end
24
+
25
+ # Generate the completion for a given prompt
26
+ # @param prompt [String] The prompt to complete
27
+ # @param model [String] The model to use
28
+ # @param options [Hash] The options to use (https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
29
+ # @return [String] The completed prompt
30
+ def complete(prompt:, model: nil, **options)
31
+ response = +""
32
+
33
+ client.post("api/generate") do |req|
34
+ req.body = {}
35
+ req.body["prompt"] = prompt
36
+ req.body["model"] = model || DEFAULTS[:completion_model_name]
37
+
38
+ req.body["options"] = options if options.any?
39
+
40
+ # TODO: Implement streaming support when a &block is passed in
41
+ req.options.on_data = proc do |chunk, size|
42
+ json_chunk = JSON.parse(chunk)
43
+
44
+ unless json_chunk.dig("done")
45
+ response.to_s << JSON.parse(chunk).dig("response")
46
+ end
47
+ end
48
+ end
49
+
50
+ response
51
+ end
52
+
53
+ # Generate an embedding for a given text
54
+ # @param text [String] The text to generate an embedding for
55
+ # @param model [String] The model to use
56
+ # @param options [Hash] The options to use (
57
+ def embed(text:, model: nil, **options)
58
+ response = client.post("api/embeddings") do |req|
59
+ req.body = {}
60
+ req.body["prompt"] = text
61
+ req.body["model"] = model || DEFAULTS[:embeddings_model_name]
62
+
63
+ req.body["options"] = options if options.any?
64
+ end
65
+
66
+ response.body.dig("embedding")
67
+ end
68
+
69
+ private
70
+
71
+ def client
72
+ @client ||= Faraday.new(url: url) do |conn|
73
+ conn.request :json
74
+ conn.response :json
75
+ conn.response :raise_error
76
+ end
77
+ end
78
+ end
79
+ end
@@ -34,7 +34,7 @@ module Langchain::Prompt
34
34
  # @return [void]
35
35
  #
36
36
  def validate(template:, input_variables:)
37
- input_variables_set = @input_variables.uniq
37
+ input_variables_set = input_variables.uniq
38
38
  variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
39
39
 
40
40
  missing_variables = variables_from_template - input_variables_set
@@ -75,6 +75,7 @@ module Langchain::Prompt
75
75
  @prefix = prefix
76
76
  @suffix = suffix
77
77
  @example_separator = example_separator
78
+ @validate_template = validate_template
78
79
 
79
80
  validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
80
81
  end
@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
128
128
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
129
129
  end
130
130
 
131
+ # Paper: https://arxiv.org/abs/2212.10496
132
+ # Hypothetical Document Embeddings (HyDE)-augmented similarity search
133
+ #
134
+ # @param query [String] The query to search for
135
+ # @param k [Integer] The number of results to return
136
+ # @return [String] Response
137
+ def similarity_search_with_hyde(query:, k: 4)
138
+ hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
139
+ similarity_search(query: hyde_completion, k: k)
140
+ end
141
+
131
142
  # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
132
143
  # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
133
144
  def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
142
153
  def_delegators :llm,
143
154
  :default_dimension
144
155
 
145
- def generate_prompt(question:, context:)
146
- prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
147
- prefix: "Context:",
148
- suffix: "---\nQuestion: {question}\n---\nAnswer:",
149
- example_prompt: Langchain::Prompt::PromptTemplate.new(
150
- template: "{context}",
151
- input_variables: ["context"]
152
- ),
153
- examples: [
154
- {context: context}
155
- ],
156
- input_variables: ["question"],
157
- example_separator: "\n"
156
+ # HyDE-style prompt
157
+ #
158
+ # @param [String] User's question
159
+ # @return [String] Prompt
160
+ def generate_hyde_prompt(question:)
161
+ prompt_template = Langchain::Prompt.load_from_path(
162
+ # Zero-shot prompt to generate a hypothetical document based on a given question
163
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
158
164
  )
159
-
160
165
  prompt_template.format(question: question)
161
166
  end
162
167
 
168
+ # Retrieval Augmented Generation (RAG)
169
+ #
170
+ # @param question [String] User's question
171
+ # @param context [String] The context to synthesize the answer from
172
+ # @return [String] Prompt
173
+ def generate_rag_prompt(question:, context:)
174
+ prompt_template = Langchain::Prompt.load_from_path(
175
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
176
+ )
177
+ prompt_template.format(question: question, context: context)
178
+ end
179
+
163
180
  def add_data(paths:)
164
181
  raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
182
 
@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
37
37
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
38
38
  embedding: llm.embed(text: text),
39
39
  # TODO: Add support for passing metadata
40
- metadata: [], # metadatas[index],
40
+ metadata: {}, # metadatas[index],
41
41
  document: text # Do we actually need to store the whole original document?
42
42
  )
43
43
  end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
124
124
 
125
125
  context = context.join("\n---\n")
126
126
 
127
- prompt = generate_prompt(question: question, context: context)
127
+ prompt = generate_rag_prompt(question: question, context: context)
128
128
 
129
129
  llm.chat(prompt: prompt, &block)
130
130
  end
@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
148
148
 
149
149
  context = content_data.join("\n---\n")
150
150
 
151
- prompt = generate_prompt(question: question, context: context)
151
+ prompt = generate_rag_prompt(question: question, context: context)
152
152
 
153
153
  llm.chat(prompt: prompt, &block)
154
154
  end
@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
144
144
  end
145
145
  context = context.join("\n---\n")
146
146
 
147
- prompt = generate_prompt(question: question, context: context)
147
+ prompt = generate_rag_prompt(question: question, context: context)
148
148
 
149
149
  llm.chat(prompt: prompt, &block)
150
150
  end
@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
177
177
  end
178
178
  context = context.join("\n---\n")
179
179
 
180
- prompt = generate_prompt(question: question, context: context)
180
+ prompt = generate_rag_prompt(question: question, context: context)
181
181
 
182
182
  llm.chat(prompt: prompt, &block)
183
183
  end
@@ -0,0 +1,10 @@
1
+ # Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
2
+ _type: prompt
3
+ input_variables:
4
+ - question
5
+ template: |
6
+ Please write a passage to answer the question
7
+
8
+ Question: {question}
9
+
10
+ Passage:
@@ -0,0 +1,11 @@
1
+ _type: prompt
2
+ input_variables:
3
+ - question
4
+ - context
5
+ template: |
6
+ Context:
7
+ {context}
8
+ ---
9
+ Question: {question}
10
+ ---
11
+ Answer:
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.14"
4
+ VERSION = "0.6.16"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.14
4
+ version: 0.6.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-11 00:00:00.000000000 Z
11
+ date: 2023-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.8
19
+ version: 0.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.8
26
+ version: 0.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: colorize
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - '='
81
81
  - !ruby/object:Gem::Version
82
82
  version: 2.6.11
83
+ - !ruby/object:Gem::Dependency
84
+ name: pragmatic_segmenter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.3.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.3.0
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: dotenv-rails
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -478,14 +492,14 @@ dependencies:
478
492
  requirements:
479
493
  - - "~>"
480
494
  - !ruby/object:Gem::Version
481
- version: 0.8.6
495
+ version: 0.8.7
482
496
  type: :development
483
497
  prerelease: false
484
498
  version_requirements: !ruby/object:Gem::Requirement
485
499
  requirements:
486
500
  - - "~>"
487
501
  - !ruby/object:Gem::Version
488
- version: 0.8.6
502
+ version: 0.8.7
489
503
  - !ruby/object:Gem::Dependency
490
504
  name: wikipedia-client
491
505
  requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
521
535
  - lib/langchain/ai_message.rb
522
536
  - lib/langchain/chunker/base.rb
523
537
  - lib/langchain/chunker/recursive_text.rb
538
+ - lib/langchain/chunker/sentence.rb
524
539
  - lib/langchain/chunker/text.rb
525
540
  - lib/langchain/contextual_logger.rb
526
541
  - lib/langchain/conversation.rb
@@ -535,6 +550,7 @@ files:
535
550
  - lib/langchain/llm/google_palm.rb
536
551
  - lib/langchain/llm/hugging_face.rb
537
552
  - lib/langchain/llm/llama_cpp.rb
553
+ - lib/langchain/llm/ollama.rb
538
554
  - lib/langchain/llm/openai.rb
539
555
  - lib/langchain/llm/prompts/summarize_template.yaml
540
556
  - lib/langchain/llm/replicate.rb
@@ -579,6 +595,8 @@ files:
579
595
  - lib/langchain/vectorsearch/milvus.rb
580
596
  - lib/langchain/vectorsearch/pgvector.rb
581
597
  - lib/langchain/vectorsearch/pinecone.rb
598
+ - lib/langchain/vectorsearch/prompts/hyde.yaml
599
+ - lib/langchain/vectorsearch/prompts/rag.yaml
582
600
  - lib/langchain/vectorsearch/qdrant.rb
583
601
  - lib/langchain/vectorsearch/weaviate.rb
584
602
  - lib/langchain/version.rb
@@ -606,7 +624,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
606
624
  - !ruby/object:Gem::Version
607
625
  version: '0'
608
626
  requirements: []
609
- rubygems_version: 3.2.33
627
+ rubygems_version: 3.3.7
610
628
  signing_key:
611
629
  specification_version: 4
612
630
  summary: Build LLM-backed Ruby applications with Ruby's LangChain