langchainrb 0.6.14 → 0.6.16

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7beb48b4b2bc88c4a25bef1cbc9eb0e95c0bd3eaeb02af6a12e0026c9081dd6d
4
- data.tar.gz: ebdb7816bf0e37e221a13ecf6cb620a335d3d49b564be76f4f714cb9849ebeb6
3
+ metadata.gz: 36e0bec4ad6abfd9077c9e7f2d6166ba99acb7dc3859749ee6facfb9409e6379
4
+ data.tar.gz: 6bd8d3de4f1d31b718381fcef1c21a8b417b2bd8483d7fdc2610cfda3b60a50e
5
5
  SHA512:
6
- metadata.gz: 1cb2c147ffbe999eb1f027161e6cda3beea76e31b821bdec564eb36cc6a2d96e31c5d450be8d744738fabef07a9f519c8b96ab2e6dc9585fb05ceea7ebc494a2
7
- data.tar.gz: 9f3a1d015de4f568bea1e08637a07ed6bf2ef93bb68068ebe51a50c16ca5a1d5d3f850cf19439ad785b6078305a7dfbd740f7bf7916c1e3466efdb04060f360e
6
+ metadata.gz: ed7be8f193d44075f701622fd991127ab32580293fb6d1ab7ccc096eeff8704312ad34cdb7a4cfd09cf8879116ede17a5b017fe15851b9ee78cb159b7e8d8b59
7
+ data.tar.gz: f70d7a3707ed7fce123c2f9158c338cda3aa38a46abf5598f7d05c6ccd63d5a16a37ba10ff0a7a0a4cd17c0c2aeb2f07a07842a41f16322c48c7c9bae522dda4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.16] - 2023-10-02
4
+ - HyDE-style similarity search
5
+ - `Langchain::Chunker::Sentence` chunker
6
+ - Bug fixes
7
+
8
+ ## [0.6.15] - 2023-09-22
9
+ - Bump weaviate-ruby gem version
10
+ - Ollama support
11
+
3
12
  ## [0.6.14] - 2023-09-11
4
13
  - Add `find` method to `Langchain::Vectorsearch::Qdrant`
5
14
  - Enhance Google search output
data/README.md CHANGED
@@ -97,6 +97,10 @@ client.similarity_search(
97
97
  )
98
98
  ```
99
99
  ```ruby
100
+ # Retrieve similar documents based on the query string passed in via the [HyDE technique](https://arxiv.org/abs/2212.10496)
101
+ client.similarity_search_with_hyde()
102
+ ```
103
+ ```ruby
100
104
  # Retrieve similar documents based on the embedding passed in
101
105
  client.similarity_search_by_vector(
102
106
  embedding:,
@@ -210,6 +214,18 @@ anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
210
214
  anthropic.complete(prompt: "What is the meaning of life?")
211
215
  ```
212
216
 
217
+ #### Ollama
218
+ ```ruby
219
+ ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
220
+ ```
221
+
222
+ ```ruby
223
+ ollama.complete(prompt: "What is the meaning of life?")
224
+ ```
225
+ ```ruby
226
+ ollama.embed(text: "Hello world!")
227
+ ```
228
+
213
229
  ### Using Prompts 📋
214
230
 
215
231
  #### Prompt Templates
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pragmatic_segmenter"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # This chunker splits text by sentences.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::Sentence.new(text).chunks
12
+ #
13
+ class Sentence < Base
14
+ attr_reader :text
15
+
16
+ # @param text [String]
17
+ # @return [Langchain::Chunker::Sentence]
18
+ def initialize(text)
19
+ @text = text
20
+ end
21
+
22
+ # @return [Array<String>]
23
+ def chunks
24
+ ps = PragmaticSegmenter::Segmenter.new(text: text)
25
+ ps.segment
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::LLM
4
+ # Interface to Ollama API.
5
+ # Available models: https://ollama.ai/library
6
+ #
7
+ # Usage:
8
+ # ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
9
+ #
10
+ class Ollama < Base
11
+ attr_reader :url
12
+
13
+ DEFAULTS = {
14
+ temperature: 0.0,
15
+ completion_model_name: "llama2",
16
+ embeddings_model_name: "llama2"
17
+ }.freeze
18
+
19
+ # Initialize the Ollama client
20
+ # @param url [String] The URL of the Ollama instance
21
+ def initialize(url:)
22
+ @url = url
23
+ end
24
+
25
+ # Generate the completion for a given prompt
26
+ # @param prompt [String] The prompt to complete
27
+ # @param model [String] The model to use
28
+ # @param options [Hash] The options to use (https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
29
+ # @return [String] The completed prompt
30
+ def complete(prompt:, model: nil, **options)
31
+ response = +""
32
+
33
+ client.post("api/generate") do |req|
34
+ req.body = {}
35
+ req.body["prompt"] = prompt
36
+ req.body["model"] = model || DEFAULTS[:completion_model_name]
37
+
38
+ req.body["options"] = options if options.any?
39
+
40
+ # TODO: Implement streaming support when a &block is passed in
41
+ req.options.on_data = proc do |chunk, size|
42
+ json_chunk = JSON.parse(chunk)
43
+
44
+ unless json_chunk.dig("done")
45
+ response.to_s << JSON.parse(chunk).dig("response")
46
+ end
47
+ end
48
+ end
49
+
50
+ response
51
+ end
52
+
53
+ # Generate an embedding for a given text
54
+ # @param text [String] The text to generate an embedding for
55
+ # @param model [String] The model to use
56
+ # @param options [Hash] The options to use (
57
+ def embed(text:, model: nil, **options)
58
+ response = client.post("api/embeddings") do |req|
59
+ req.body = {}
60
+ req.body["prompt"] = text
61
+ req.body["model"] = model || DEFAULTS[:embeddings_model_name]
62
+
63
+ req.body["options"] = options if options.any?
64
+ end
65
+
66
+ response.body.dig("embedding")
67
+ end
68
+
69
+ private
70
+
71
+ def client
72
+ @client ||= Faraday.new(url: url) do |conn|
73
+ conn.request :json
74
+ conn.response :json
75
+ conn.response :raise_error
76
+ end
77
+ end
78
+ end
79
+ end
@@ -34,7 +34,7 @@ module Langchain::Prompt
34
34
  # @return [void]
35
35
  #
36
36
  def validate(template:, input_variables:)
37
- input_variables_set = @input_variables.uniq
37
+ input_variables_set = input_variables.uniq
38
38
  variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
39
39
 
40
40
  missing_variables = variables_from_template - input_variables_set
@@ -75,6 +75,7 @@ module Langchain::Prompt
75
75
  @prefix = prefix
76
76
  @suffix = suffix
77
77
  @example_separator = example_separator
78
+ @validate_template = validate_template
78
79
 
79
80
  validate(template: @prefix + @suffix, input_variables: @input_variables) if @validate_template
80
81
  end
@@ -128,6 +128,17 @@ module Langchain::Vectorsearch
128
128
  raise NotImplementedError, "#{self.class.name} does not support similarity search"
129
129
  end
130
130
 
131
+ # Paper: https://arxiv.org/abs/2212.10496
132
+ # Hypothetical Document Embeddings (HyDE)-augmented similarity search
133
+ #
134
+ # @param query [String] The query to search for
135
+ # @param k [Integer] The number of results to return
136
+ # @return [String] Response
137
+ def similarity_search_with_hyde(query:, k: 4)
138
+ hyde_completion = llm.complete(prompt: generate_hyde_prompt(question: query))
139
+ similarity_search(query: hyde_completion, k: k)
140
+ end
141
+
131
142
  # Method supported by Vectorsearch DB to search for similar texts in the index by the passed in vector.
132
143
  # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
133
144
  def similarity_search_by_vector(...)
@@ -142,24 +153,30 @@ module Langchain::Vectorsearch
142
153
  def_delegators :llm,
143
154
  :default_dimension
144
155
 
145
- def generate_prompt(question:, context:)
146
- prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
147
- prefix: "Context:",
148
- suffix: "---\nQuestion: {question}\n---\nAnswer:",
149
- example_prompt: Langchain::Prompt::PromptTemplate.new(
150
- template: "{context}",
151
- input_variables: ["context"]
152
- ),
153
- examples: [
154
- {context: context}
155
- ],
156
- input_variables: ["question"],
157
- example_separator: "\n"
156
+ # HyDE-style prompt
157
+ #
158
+ # @param [String] User's question
159
+ # @return [String] Prompt
160
+ def generate_hyde_prompt(question:)
161
+ prompt_template = Langchain::Prompt.load_from_path(
162
+ # Zero-shot prompt to generate a hypothetical document based on a given question
163
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/hyde.yaml")
158
164
  )
159
-
160
165
  prompt_template.format(question: question)
161
166
  end
162
167
 
168
+ # Retrieval Augmented Generation (RAG)
169
+ #
170
+ # @param question [String] User's question
171
+ # @param context [String] The context to synthesize the answer from
172
+ # @return [String] Prompt
173
+ def generate_rag_prompt(question:, context:)
174
+ prompt_template = Langchain::Prompt.load_from_path(
175
+ file_path: Langchain.root.join("langchain/vectorsearch/prompts/rag.yaml")
176
+ )
177
+ prompt_template.format(question: question, context: context)
178
+ end
179
+
163
180
  def add_data(paths:)
164
181
  raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
182
 
@@ -37,7 +37,7 @@ module Langchain::Vectorsearch
37
37
  id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
38
38
  embedding: llm.embed(text: text),
39
39
  # TODO: Add support for passing metadata
40
- metadata: [], # metadatas[index],
40
+ metadata: {}, # metadatas[index],
41
41
  document: text # Do we actually need to store the whole original document?
42
42
  )
43
43
  end
@@ -124,7 +124,7 @@ module Langchain::Vectorsearch
124
124
 
125
125
  context = context.join("\n---\n")
126
126
 
127
- prompt = generate_prompt(question: question, context: context)
127
+ prompt = generate_rag_prompt(question: question, context: context)
128
128
 
129
129
  llm.chat(prompt: prompt, &block)
130
130
  end
@@ -148,7 +148,7 @@ module Langchain::Vectorsearch
148
148
 
149
149
  context = content_data.join("\n---\n")
150
150
 
151
- prompt = generate_prompt(question: question, context: context)
151
+ prompt = generate_rag_prompt(question: question, context: context)
152
152
 
153
153
  llm.chat(prompt: prompt, &block)
154
154
  end
@@ -144,7 +144,7 @@ module Langchain::Vectorsearch
144
144
  end
145
145
  context = context.join("\n---\n")
146
146
 
147
- prompt = generate_prompt(question: question, context: context)
147
+ prompt = generate_rag_prompt(question: question, context: context)
148
148
 
149
149
  llm.chat(prompt: prompt, &block)
150
150
  end
@@ -177,7 +177,7 @@ module Langchain::Vectorsearch
177
177
  end
178
178
  context = context.join("\n---\n")
179
179
 
180
- prompt = generate_prompt(question: question, context: context)
180
+ prompt = generate_rag_prompt(question: question, context: context)
181
181
 
182
182
  llm.chat(prompt: prompt, &block)
183
183
  end
@@ -0,0 +1,10 @@
1
+ # Inspiration: https://github.com/langchain-ai/langchain/blob/v0.0.254/libs/langchain/langchain/chains/hyde/prompts.py#L4-L6
2
+ _type: prompt
3
+ input_variables:
4
+ - question
5
+ template: |
6
+ Please write a passage to answer the question
7
+
8
+ Question: {question}
9
+
10
+ Passage:
@@ -0,0 +1,11 @@
1
+ _type: prompt
2
+ input_variables:
3
+ - question
4
+ - context
5
+ template: |
6
+ Context:
7
+ {context}
8
+ ---
9
+ Question: {question}
10
+ ---
11
+ Answer:
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -134,7 +134,7 @@ module Langchain::Vectorsearch
134
134
  end
135
135
  context = context.join("\n---\n")
136
136
 
137
- prompt = generate_prompt(question: question, context: context)
137
+ prompt = generate_rag_prompt(question: question, context: context)
138
138
 
139
139
  llm.chat(prompt: prompt, &block)
140
140
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.14"
4
+ VERSION = "0.6.16"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.14
4
+ version: 0.6.16
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-09-11 00:00:00.000000000 Z
11
+ date: 2023-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: 0.1.8
19
+ version: 0.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: 0.1.8
26
+ version: 0.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: colorize
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - '='
81
81
  - !ruby/object:Gem::Version
82
82
  version: 2.6.11
83
+ - !ruby/object:Gem::Dependency
84
+ name: pragmatic_segmenter
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.3.0
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.3.0
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: dotenv-rails
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -478,14 +492,14 @@ dependencies:
478
492
  requirements:
479
493
  - - "~>"
480
494
  - !ruby/object:Gem::Version
481
- version: 0.8.6
495
+ version: 0.8.7
482
496
  type: :development
483
497
  prerelease: false
484
498
  version_requirements: !ruby/object:Gem::Requirement
485
499
  requirements:
486
500
  - - "~>"
487
501
  - !ruby/object:Gem::Version
488
- version: 0.8.6
502
+ version: 0.8.7
489
503
  - !ruby/object:Gem::Dependency
490
504
  name: wikipedia-client
491
505
  requirement: !ruby/object:Gem::Requirement
@@ -521,6 +535,7 @@ files:
521
535
  - lib/langchain/ai_message.rb
522
536
  - lib/langchain/chunker/base.rb
523
537
  - lib/langchain/chunker/recursive_text.rb
538
+ - lib/langchain/chunker/sentence.rb
524
539
  - lib/langchain/chunker/text.rb
525
540
  - lib/langchain/contextual_logger.rb
526
541
  - lib/langchain/conversation.rb
@@ -535,6 +550,7 @@ files:
535
550
  - lib/langchain/llm/google_palm.rb
536
551
  - lib/langchain/llm/hugging_face.rb
537
552
  - lib/langchain/llm/llama_cpp.rb
553
+ - lib/langchain/llm/ollama.rb
538
554
  - lib/langchain/llm/openai.rb
539
555
  - lib/langchain/llm/prompts/summarize_template.yaml
540
556
  - lib/langchain/llm/replicate.rb
@@ -579,6 +595,8 @@ files:
579
595
  - lib/langchain/vectorsearch/milvus.rb
580
596
  - lib/langchain/vectorsearch/pgvector.rb
581
597
  - lib/langchain/vectorsearch/pinecone.rb
598
+ - lib/langchain/vectorsearch/prompts/hyde.yaml
599
+ - lib/langchain/vectorsearch/prompts/rag.yaml
582
600
  - lib/langchain/vectorsearch/qdrant.rb
583
601
  - lib/langchain/vectorsearch/weaviate.rb
584
602
  - lib/langchain/version.rb
@@ -606,7 +624,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
606
624
  - !ruby/object:Gem::Version
607
625
  version: '0'
608
626
  requirements: []
609
- rubygems_version: 3.2.33
627
+ rubygems_version: 3.3.7
610
628
  signing_key:
611
629
  specification_version: 4
612
630
  summary: Build LLM-backed Ruby applications with Ruby's LangChain