langchainrb 0.3.12 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
4
- data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
3
+ metadata.gz: 33c9436ac8d6a73dc06d30f63c11e4f246b3705aa8934765a53ee59325c3a9cd
4
+ data.tar.gz: 9cc85603694f9367dd162e25379029a345aa0b5c88cccf303c2af114d43a4010
5
5
  SHA512:
6
- metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
7
- data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4
6
+ metadata.gz: ca5e81638625939d11999a64d44c92fc57c762a934aa8fd5b110c3f5aacc9a736ab5f02da4366e7a1b9b9ec0335dd1eb1683f5b9d90bd97c81914ea0a698dc7c
7
+ data.tar.gz: aff49ef9451bcbc9a97d181757a5b913737cbfbb4fc3ca49d423cbd2e59a4a71091816e98c2996ff7f8292cb6e0c0d69931a4f3e4e36e2a69fdd7f745640e266
data/.env.example CHANGED
@@ -11,4 +11,5 @@ QDRANT_API_KEY=
11
11
  QDRANT_URL=
12
12
  SERPAPI_API_KEY=
13
13
  WEAVIATE_API_KEY=
14
- WEAVIATE_URL=
14
+ WEAVIATE_URL=
15
+ POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.14] - 2023-05-28
4
+ - 🔍 Vectorsearch
5
+ - Not relying on Weaviate modules anymore
6
+ - Adding missing specs for Qdrant and Milvus classes
7
+ - 🚚 Loaders
8
+ - Add Langchain::Data result object for data loaders
9
+ - 🗣️ LLMs
10
+ - Add `summarize()` method to the LLMs
11
+
12
+ ## [0.3.13] - 2023-05-26
13
+ - 🔍 Vectorsearch
14
+ - Pgvector support
15
+ - 🚚 Loaders
16
+ - CSV loader
17
+ - JSON loader
18
+ - JSONL loader
19
+
3
20
  ## [0.3.12] - 2023-05-25
4
21
  - 🔍 Vectorsearch
5
22
  - Introduce namespace support for Pinecone
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.12)
4
+ langchainrb (0.3.14)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -148,9 +148,13 @@ GEM
148
148
  milvus (0.9.1)
149
149
  faraday (~> 1)
150
150
  mini_mime (1.1.2)
151
+ mini_portile2 (2.8.2)
151
152
  minitest (5.18.0)
152
153
  multi_xml (0.6.0)
153
154
  multipart-post (2.3.0)
155
+ nokogiri (1.14.3)
156
+ mini_portile2 (~> 2.8.0)
157
+ racc (~> 1.4)
154
158
  nokogiri (1.14.3-arm64-darwin)
155
159
  racc (~> 1.4)
156
160
  nokogiri (1.14.3-x86_64-darwin)
@@ -166,6 +170,8 @@ GEM
166
170
  hashery (~> 2.0)
167
171
  ruby-rc4
168
172
  ttfunk
173
+ pg (1.5.3)
174
+ pgvector (0.1.1)
169
175
  pinecone (0.1.71)
170
176
  dry-struct (~> 1.6.0)
171
177
  dry-validation (~> 1.10.0)
@@ -273,6 +279,7 @@ GEM
273
279
  PLATFORMS
274
280
  arm64-darwin-21
275
281
  arm64-darwin-22
282
+ ruby
276
283
  x86_64-darwin-19
277
284
  x86_64-darwin-22
278
285
  x86_64-linux
@@ -290,6 +297,8 @@ DEPENDENCIES
290
297
  milvus (~> 0.9.0)
291
298
  nokogiri (~> 1.13)
292
299
  pdf-reader (~> 1.4)
300
+ pg (~> 1.5)
301
+ pgvector (< 0.2)
293
302
  pinecone (~> 0.1.6)
294
303
  pry-byebug (~> 3.10.0)
295
304
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -284,12 +284,16 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
284
284
 
285
285
  ##### Supported Formats
286
286
 
287
- | Format | Pocessor | Gem Requirements |
288
- | ------ | ---------------- | :--------------------------: |
289
- | docx | Processors::Docx | `gem "docx", "~> 0.8.0"` |
290
- | html | Processors::HTML | `gem "nokogiri", "~> 1.13"` |
291
- | pdf | Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
292
- | text | Processors::Text | |
287
+
288
+ | Format | Pocessor | Gem Requirements |
289
+ | ------ | ---------------------------- | :--------------------------: |
290
+ | docx | Langchain::Processors::Docx | `gem "docx", "~> 0.8.0"` |
291
+ | html | Langchain::Processors::HTML | `gem "nokogiri", "~> 1.13"` |
292
+ | pdf | Langchain::Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
293
+ | text | Langchain::Processors::Text | |
294
+ | JSON | Langchain::Processors::JSON | |
295
+ | JSONL | Langchain::Processors::JSONL | |
296
+ | csv | Langchain::Processors::CSV | |
293
297
 
294
298
  ## Examples
295
299
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -317,6 +321,7 @@ Langchain.logger.level = :info
317
321
  [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
318
322
  [<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
319
323
  [<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
324
+ [<img style="border-radius:50%" alt="Alex Chaplinsky" src="https://avatars.githubusercontent.com/u/695947?v=4" width="80" height="80" class="avatar">](https://github.com/alchaplinsky)
320
325
 
321
326
  (Criteria for becoming an Honorary Contributor or Core Contributor is pending...)
322
327
 
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Data
5
+ attr_reader :source
6
+
7
+ def initialize(data, options = {})
8
+ @source = options[:source]
9
+ @data = data
10
+ end
11
+
12
+ def value
13
+ @data
14
+ end
15
+ end
16
+ end
@@ -58,11 +58,12 @@ module Langchain
58
58
  end
59
59
 
60
60
  def process(&block)
61
- data, processor = yield
61
+ raw_data, kind = yield
62
62
 
63
- raise UnknownFormatError unless processor
63
+ raise UnknownFormatError unless kind
64
64
 
65
- Langchain::Processors.const_get(processor).new.parse(data)
65
+ processor = Langchain::Processors.const_get(kind).new
66
+ Langchain::Data.new(processor.parse(raw_data), source: @path)
66
67
  end
67
68
 
68
69
  def find_processor(constant, value)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Langchain
6
+ module Processors
7
+ class CSV < Base
8
+ EXTENSIONS = [".csv"]
9
+ CONTENT_TYPES = ["text/csv"]
10
+
11
+ # Parse the document and return the text
12
+ # @param [File] data
13
+ # @return [Array of Hash]
14
+ def parse(data)
15
+ ::CSV.new(data.read).map do |row|
16
+ row.map(&:strip)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSON < Base
6
+ EXTENSIONS = [".json"]
7
+ CONTENT_TYPES = ["application/json"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Hash]
12
+ def parse(data)
13
+ ::JSON.parse(data.read)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSONL < Base
6
+ EXTENSIONS = [".jsonl"]
7
+ CONTENT_TYPES = ["application/jsonl", "application/json-lines", "application/jsonlines"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Array of Hash]
12
+ def parse(data)
13
+ data.read.lines.map do |line|
14
+ ::JSON.parse(line)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/langchain.rb CHANGED
@@ -17,13 +17,17 @@ module Langchain
17
17
  @root = Pathname.new(__dir__)
18
18
 
19
19
  autoload :Loader, "langchain/loader"
20
+ autoload :Data, "langchain/data"
20
21
 
21
22
  module Processors
22
23
  autoload :Base, "langchain/processors/base"
23
- autoload :PDF, "langchain/processors/pdf"
24
+ autoload :CSV, "langchain/processors/csv"
25
+ autoload :Docx, "langchain/processors/docx"
24
26
  autoload :HTML, "langchain/processors/html"
27
+ autoload :JSON, "langchain/processors/json"
28
+ autoload :JSONL, "langchain/processors/jsonl"
29
+ autoload :PDF, "langchain/processors/pdf"
25
30
  autoload :Text, "langchain/processors/text"
26
- autoload :Docx, "langchain/processors/docx"
27
31
  end
28
32
  end
29
33
 
@@ -37,6 +41,7 @@ module Vectorsearch
37
41
  autoload :Chroma, "vectorsearch/chroma"
38
42
  autoload :Milvus, "vectorsearch/milvus"
39
43
  autoload :Pinecone, "vectorsearch/pinecone"
44
+ autoload :Pgvector, "vectorsearch/pgvector"
40
45
  autoload :Qdrant, "vectorsearch/qdrant"
41
46
  autoload :Weaviate, "vectorsearch/weaviate"
42
47
  end
data/lib/llm/base.rb CHANGED
@@ -33,6 +33,11 @@ module LLM
33
33
  raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
34
34
  end
35
35
 
36
+ # Method supported by an LLM that summarizes a given text
37
+ def summarize(...)
38
+ raise NotImplementedError, "#{self.class.name} does not support summarization"
39
+ end
40
+
36
41
  # Ensure that the LLM value passed in is supported
37
42
  # @param llm [Symbol] The LLM to use
38
43
  def self.validate_llm!(llm:)
data/lib/llm/cohere.rb CHANGED
@@ -16,9 +16,12 @@ module LLM
16
16
  @client = ::Cohere::Client.new(api_key: api_key)
17
17
  end
18
18
 
19
+ #
19
20
  # Generate an embedding for a given text
21
+ #
20
22
  # @param text [String] The text to generate an embedding for
21
23
  # @return [Hash] The embedding
24
+ #
22
25
  def embed(text:)
23
26
  response = client.embed(
24
27
  texts: [text],
@@ -27,9 +30,12 @@ module LLM
27
30
  response.dig("embeddings").first
28
31
  end
29
32
 
33
+ #
30
34
  # Generate a completion for a given prompt
35
+ #
31
36
  # @param prompt [String] The prompt to generate a completion for
32
37
  # @return [Hash] The completion
38
+ #
33
39
  def complete(prompt:, **params)
34
40
  default_params = {
35
41
  prompt: prompt,
@@ -51,5 +57,16 @@ module LLM
51
57
  def chat(...)
52
58
  complete(...)
53
59
  end
60
+
61
+ # Generate a summary in English for a given text
62
+ #
63
+ # More parameters available to extend this method with: https://github.com/andreibondarev/cohere-ruby/blob/0.9.4/lib/cohere/client.rb#L107-L115
64
+ #
65
+ # @param text [String] The text to generate a summary for
66
+ # @return [String] The summary
67
+ def summarize(text:)
68
+ response = client.summarize(text: text)
69
+ response.dig("summary")
70
+ end
54
71
  end
55
72
  end
@@ -81,5 +81,25 @@ module LLM
81
81
  response = client.generate_chat_message(**default_params)
82
82
  response.dig("candidates", 0, "content")
83
83
  end
84
+
85
+ #
86
+ # Generate a summarization for a given text
87
+ #
88
+ # @param text [String] The text to generate a summarization for
89
+ # @return [String] The summarization
90
+ #
91
+ def summarize(text:)
92
+ prompt_template = Prompt.load_from_path(
93
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
94
+ )
95
+ prompt = prompt_template.format(text: text)
96
+
97
+ complete(
98
+ prompt: prompt,
99
+ temperature: DEFAULTS[:temperature],
100
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
101
+ max_tokens: 2048
102
+ )
103
+ end
84
104
  end
85
105
  end
@@ -12,6 +12,7 @@ module LLM
12
12
 
13
13
  #
14
14
  # Intialize the HuggingFace LLM
15
+ #
15
16
  # @param api_key [String] The API key to use
16
17
  #
17
18
  def initialize(api_key:)
@@ -21,9 +22,12 @@ module LLM
21
22
  @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
22
23
  end
23
24
 
25
+ #
24
26
  # Generate an embedding for a given text
27
+ #
25
28
  # @param text [String] The text to embed
26
29
  # @return [Array] The embedding
30
+ #
27
31
  def embed(text:)
28
32
  client.embedding(
29
33
  input: text,
data/lib/llm/openai.rb CHANGED
@@ -18,9 +18,12 @@ module LLM
18
18
  @client = ::OpenAI::Client.new(access_token: api_key)
19
19
  end
20
20
 
21
+ #
21
22
  # Generate an embedding for a given text
23
+ #
22
24
  # @param text [String] The text to generate an embedding for
23
25
  # @return [Array] The embedding
26
+ #
24
27
  def embed(text:)
25
28
  response = client.embeddings(
26
29
  parameters: {
@@ -31,9 +34,12 @@ module LLM
31
34
  response.dig("data").first.dig("embedding")
32
35
  end
33
36
 
37
+ #
34
38
  # Generate a completion for a given prompt
39
+ #
35
40
  # @param prompt [String] The prompt to generate a completion for
36
41
  # @return [String] The completion
42
+ #
37
43
  def complete(prompt:, **params)
38
44
  default_params = {
39
45
  model: DEFAULTS[:completion_model_name],
@@ -51,9 +57,12 @@ module LLM
51
57
  response.dig("choices", 0, "text")
52
58
  end
53
59
 
60
+ #
54
61
  # Generate a chat completion for a given prompt
62
+ #
55
63
  # @param prompt [String] The prompt to generate a chat completion for
56
64
  # @return [String] The chat completion
65
+ #
57
66
  def chat(prompt:, **params)
58
67
  default_params = {
59
68
  model: DEFAULTS[:chat_completion_model_name],
@@ -71,5 +80,25 @@ module LLM
71
80
  response = client.chat(parameters: default_params)
72
81
  response.dig("choices", 0, "message", "content")
73
82
  end
83
+
84
+ #
85
+ # Generate a summary for a given text
86
+ #
87
+ # @param text [String] The text to generate a summary for
88
+ # @return [String] The summary
89
+ #
90
+ def summarize(text:)
91
+ prompt_template = Prompt.load_from_path(
92
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
93
+ )
94
+ prompt = prompt_template.format(text: text)
95
+
96
+ complete(
97
+ prompt: prompt,
98
+ temperature: DEFAULTS[:temperature],
99
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
100
+ max_tokens: 2048
101
+ )
102
+ end
74
103
  end
75
104
  end
@@ -0,0 +1,5 @@
1
+ {
2
+ "_type": "prompt",
3
+ "input_variables": ["text"],
4
+ "template": "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY:"
5
+ }
data/lib/llm/replicate.rb CHANGED
@@ -23,8 +23,11 @@ module LLM
23
23
  dimension: 384
24
24
  }.freeze
25
25
 
26
+ #
26
27
  # Intialize the Replicate LLM
28
+ #
27
29
  # @param api_key [String] The API key to use
30
+ #
28
31
  def initialize(api_key:)
29
32
  depends_on "replicate-ruby"
30
33
  require "replicate"
@@ -36,9 +39,12 @@ module LLM
36
39
  @client = ::Replicate.client
37
40
  end
38
41
 
42
+ #
39
43
  # Generate an embedding for a given text
44
+ #
40
45
  # @param text [String] The text to generate an embedding for
41
46
  # @return [Hash] The embedding
47
+ #
42
48
  def embed(text:)
43
49
  response = embeddings_model.predict(input: text)
44
50
 
@@ -50,9 +56,12 @@ module LLM
50
56
  response.output
51
57
  end
52
58
 
59
+ #
53
60
  # Generate a completion for a given prompt
61
+ #
54
62
  # @param prompt [String] The prompt to generate a completion for
55
63
  # @return [Hash] The completion
64
+ #
56
65
  def complete(prompt:, **params)
57
66
  response = completion_model.predict(prompt: prompt)
58
67
 
@@ -73,6 +82,26 @@ module LLM
73
82
  complete(...)
74
83
  end
75
84
 
85
+ #
86
+ # Generate a summary for a given text
87
+ #
88
+ # @param text [String] The text to generate a summary for
89
+ # @return [String] The summary
90
+ #
91
+ def summarize(text:)
92
+ prompt_template = Prompt.load_from_path(
93
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
94
+ )
95
+ prompt = prompt_template.format(text: text)
96
+
97
+ complete(
98
+ prompt: prompt,
99
+ temperature: DEFAULTS[:temperature],
100
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
101
+ max_tokens: 2048
102
+ )
103
+ end
104
+
76
105
  alias_method :generate_embedding, :embed
77
106
 
78
107
  private
@@ -74,7 +74,7 @@ module Vectorsearch
74
74
 
75
75
  texts = Array(path || paths)
76
76
  .flatten
77
- .map { |path| Langchain::Loader.new(path)&.load }
77
+ .map { |path| Langchain::Loader.new(path)&.load&.value }
78
78
  .compact
79
79
 
80
80
  add_texts(texts: texts)
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ # The PostgreSQL vector search adapter
5
+ class Pgvector < Base
6
+ # @param url [String] The URL of the PostgreSQL database
7
+ # @param index_name [String] The name of the table to use for the index
8
+ # @param llm [String] The URL of the Language Layer API
9
+ # @param llm_api_key [String] The API key for the Language Layer API
10
+ # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ require "pg"
13
+ require "pgvector"
14
+
15
+ @client = ::PG.connect(url)
16
+ registry = ::PG::BasicTypeRegistry.new.define_default_types
17
+ ::Pgvector::PG.register_vector(registry)
18
+ @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
+
20
+ @index_name = index_name
21
+
22
+ super(llm: llm, llm_api_key: llm_api_key)
23
+ end
24
+
25
+ # Add a list of texts to the index
26
+ # @param texts [Array<String>] The texts to add to the index
27
+ # @return [PG::Result] The response from the database
28
+ def add_texts(texts:)
29
+ data = texts.flat_map do |text|
30
+ [text, llm_client.embed(text: text)]
31
+ end
32
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
+ client.exec_params(
34
+ "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
35
+ data
36
+ )
37
+ end
38
+
39
+ # Create default schema
40
+ # @return [PG::Result] The response from the database
41
+ def create_default_schema
42
+ client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
+ client.exec(
44
+ <<~SQL
45
+ CREATE TABLE IF NOT EXISTS #{@index_name} (
46
+ id serial PRIMARY KEY,
47
+ content TEXT,
48
+ vectors VECTOR(#{default_dimension})
49
+ );
50
+ SQL
51
+ )
52
+ end
53
+
54
+ # Search for similar texts in the index
55
+ # @param query [String] The text to search for
56
+ # @param k [Integer] The number of top results to return
57
+ # @return [Array<Hash>] The results of the search
58
+ def similarity_search(query:, k: 4)
59
+ embedding = llm_client.embed(text: query)
60
+
61
+ similarity_search_by_vector(
62
+ embedding: embedding,
63
+ k: k
64
+ )
65
+ end
66
+
67
+ # Search for similar texts in the index by the passed in vector.
68
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
69
+ # @param embedding [Array<Float>] The vector to search for
70
+ # @param k [Integer] The number of top results to return
71
+ # @return [Array<Hash>] The results of the search
72
+ def similarity_search_by_vector(embedding:, k: 4)
73
+ result = client.transaction do |conn|
74
+ conn.exec("SET LOCAL ivfflat.probes = 10;")
75
+ query = <<~SQL
76
+ SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
77
+ SQL
78
+ conn.exec_params(query, [embedding, k])
79
+ end
80
+
81
+ result.to_a
82
+ end
83
+
84
+ # Ask a question and return the answer
85
+ # @param question [String] The question to ask
86
+ # @return [String] The answer to the question
87
+ def ask(question:)
88
+ search_results = similarity_search(query: question)
89
+
90
+ context = search_results.map do |result|
91
+ result["content"].to_s
92
+ end
93
+ context = context.join("\n---\n")
94
+
95
+ prompt = generate_prompt(question: question, context: context)
96
+
97
+ llm_client.chat(prompt: prompt)
98
+ end
99
+ end
100
+ end
@@ -14,9 +14,7 @@ module Vectorsearch
14
14
 
15
15
  @client = ::Weaviate::Client.new(
16
16
  url: url,
17
- api_key: api_key,
18
- model_service: llm,
19
- model_service_api_key: llm_api_key
17
+ api_key: api_key
20
18
  )
21
19
  @index_name = index_name
22
20
 
@@ -30,7 +28,8 @@ module Vectorsearch
30
28
  objects = Array(texts).map do |text|
31
29
  {
32
30
  class: index_name,
33
- properties: {content: text}
31
+ properties: {content: text},
32
+ vector: llm_client.embed(text: text)
34
33
  }
35
34
  end
36
35
 
@@ -43,11 +42,7 @@ module Vectorsearch
43
42
  def create_default_schema
44
43
  client.schema.create(
45
44
  class_name: index_name,
46
- vectorizer: "text2vec-#{llm}",
47
- # TODO: Figure out a way to optionally enable it
48
- # "module_config": {
49
- # "qna-openai": {}
50
- # },
45
+ vectorizer: "none",
51
46
  properties: [
52
47
  # TODO: Allow passing in your own IDs
53
48
  {
@@ -63,14 +58,9 @@ module Vectorsearch
63
58
  # @param k [Integer|String] The number of results to return
64
59
  # @return [Hash] The search results
65
60
  def similarity_search(query:, k: 4)
66
- near_text = "{ concepts: [\"#{query}\"] }"
61
+ embedding = llm_client.embed(text: query)
67
62
 
68
- client.query.get(
69
- class_name: index_name,
70
- near_text: near_text,
71
- limit: k.to_s,
72
- fields: "content _additional { id }"
73
- )
63
+ similarity_search_by_vector(embedding: embedding, k: k)
74
64
  end
75
65
 
76
66
  # Return documents similar to the vector
@@ -92,29 +82,16 @@ module Vectorsearch
92
82
  # @param question [String] The question to ask
93
83
  # @return [Hash] The answer
94
84
  def ask(question:)
95
- # Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
96
- # The Cohere support is on the way: https://github.com/weaviate/weaviate/pull/2600
97
- if llm == :openai
98
- ask_object = "{ question: \"#{question}\" }"
99
-
100
- client.query.get(
101
- class_name: index_name,
102
- ask: ask_object,
103
- limit: "1",
104
- fields: "_additional { answer { result } }"
105
- )
106
- elsif llm == :cohere
107
- search_results = similarity_search(query: question)
85
+ search_results = similarity_search(query: question)
108
86
 
109
- context = search_results.map do |result|
110
- result.dig("content").to_s
111
- end
112
- context = context.join("\n---\n")
87
+ context = search_results.map do |result|
88
+ result.dig("content").to_s
89
+ end
90
+ context = context.join("\n---\n")
113
91
 
114
- prompt = generate_prompt(question: question, context: context)
92
+ prompt = generate_prompt(question: question, context: context)
115
93
 
116
- llm_client.chat(prompt: prompt)
117
- end
94
+ llm_client.chat(prompt: prompt)
118
95
  end
119
96
  end
120
97
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.12"
4
+ VERSION = "0.3.14"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-05-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -164,6 +164,34 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '1.13'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pg
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pgvector
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "<"
186
+ - !ruby/object:Gem::Version
187
+ version: '0.2'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "<"
193
+ - !ruby/object:Gem::Version
194
+ version: '0.2'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: pdf-reader
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -288,10 +316,14 @@ files:
288
316
  - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
289
317
  - lib/dependency_helper.rb
290
318
  - lib/langchain.rb
319
+ - lib/langchain/data.rb
291
320
  - lib/langchain/loader.rb
292
321
  - lib/langchain/processors/base.rb
322
+ - lib/langchain/processors/csv.rb
293
323
  - lib/langchain/processors/docx.rb
294
324
  - lib/langchain/processors/html.rb
325
+ - lib/langchain/processors/json.rb
326
+ - lib/langchain/processors/jsonl.rb
295
327
  - lib/langchain/processors/pdf.rb
296
328
  - lib/langchain/processors/text.rb
297
329
  - lib/langchainrb.rb
@@ -300,6 +332,7 @@ files:
300
332
  - lib/llm/google_palm.rb
301
333
  - lib/llm/hugging_face.rb
302
334
  - lib/llm/openai.rb
335
+ - lib/llm/prompts/summarize_template.json
303
336
  - lib/llm/replicate.rb
304
337
  - lib/prompt/base.rb
305
338
  - lib/prompt/few_shot_prompt_template.rb
@@ -312,6 +345,7 @@ files:
312
345
  - lib/vectorsearch/base.rb
313
346
  - lib/vectorsearch/chroma.rb
314
347
  - lib/vectorsearch/milvus.rb
348
+ - lib/vectorsearch/pgvector.rb
315
349
  - lib/vectorsearch/pinecone.rb
316
350
  - lib/vectorsearch/qdrant.rb
317
351
  - lib/vectorsearch/weaviate.rb