langchainrb 0.3.12 → 0.3.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 974f0a2b8ce3fe42144016bd740ee9d4f7e597834319cc92fbf1d50bd1f4468e
4
- data.tar.gz: 3686a42c37eb117e6d7485ef4f7777c0f12968bb9cdcc3a30c7721c86c0a4325
3
+ metadata.gz: 33c9436ac8d6a73dc06d30f63c11e4f246b3705aa8934765a53ee59325c3a9cd
4
+ data.tar.gz: 9cc85603694f9367dd162e25379029a345aa0b5c88cccf303c2af114d43a4010
5
5
  SHA512:
6
- metadata.gz: a61f9b36d9d19eb6cf87af18c7fb40f55d39771257d08a6af2ec3384988419dfb158ffa8fc81c3769c0149f1ffa8b03200366bbea55b03b0d1553912af8d9ae6
7
- data.tar.gz: 7dc53be923fe5b8587f61617198b24c42e8793fbd8e18c42a17035bf68279c59c37c6c691cabe13c83adc5dc2cff66ea293f198297ab9a9de30aa68ca72bd9c4
6
+ metadata.gz: ca5e81638625939d11999a64d44c92fc57c762a934aa8fd5b110c3f5aacc9a736ab5f02da4366e7a1b9b9ec0335dd1eb1683f5b9d90bd97c81914ea0a698dc7c
7
+ data.tar.gz: aff49ef9451bcbc9a97d181757a5b913737cbfbb4fc3ca49d423cbd2e59a4a71091816e98c2996ff7f8292cb6e0c0d69931a4f3e4e36e2a69fdd7f745640e266
data/.env.example CHANGED
@@ -11,4 +11,5 @@ QDRANT_API_KEY=
11
11
  QDRANT_URL=
12
12
  SERPAPI_API_KEY=
13
13
  WEAVIATE_API_KEY=
14
- WEAVIATE_URL=
14
+ WEAVIATE_URL=
15
+ POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.14] - 2023-05-28
4
+ - 🔍 Vectorsearch
5
+ - Not relying on Weaviate modules anymore
6
+ - Adding missing specs for Qdrant and Milvus classes
7
+ - 🚚 Loaders
8
+ - Add Langchain::Data result object for data loaders
9
+ - 🗣️ LLMs
10
+ - Add `summarize()` method to the LLMs
11
+
12
+ ## [0.3.13] - 2023-05-26
13
+ - 🔍 Vectorsearch
14
+ - Pgvector support
15
+ - 🚚 Loaders
16
+ - CSV loader
17
+ - JSON loader
18
+ - JSONL loader
19
+
3
20
  ## [0.3.12] - 2023-05-25
4
21
  - 🔍 Vectorsearch
5
22
  - Introduce namespace support for Pinecone
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.12)
4
+ langchainrb (0.3.14)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -148,9 +148,13 @@ GEM
148
148
  milvus (0.9.1)
149
149
  faraday (~> 1)
150
150
  mini_mime (1.1.2)
151
+ mini_portile2 (2.8.2)
151
152
  minitest (5.18.0)
152
153
  multi_xml (0.6.0)
153
154
  multipart-post (2.3.0)
155
+ nokogiri (1.14.3)
156
+ mini_portile2 (~> 2.8.0)
157
+ racc (~> 1.4)
154
158
  nokogiri (1.14.3-arm64-darwin)
155
159
  racc (~> 1.4)
156
160
  nokogiri (1.14.3-x86_64-darwin)
@@ -166,6 +170,8 @@ GEM
166
170
  hashery (~> 2.0)
167
171
  ruby-rc4
168
172
  ttfunk
173
+ pg (1.5.3)
174
+ pgvector (0.1.1)
169
175
  pinecone (0.1.71)
170
176
  dry-struct (~> 1.6.0)
171
177
  dry-validation (~> 1.10.0)
@@ -273,6 +279,7 @@ GEM
273
279
  PLATFORMS
274
280
  arm64-darwin-21
275
281
  arm64-darwin-22
282
+ ruby
276
283
  x86_64-darwin-19
277
284
  x86_64-darwin-22
278
285
  x86_64-linux
@@ -290,6 +297,8 @@ DEPENDENCIES
290
297
  milvus (~> 0.9.0)
291
298
  nokogiri (~> 1.13)
292
299
  pdf-reader (~> 1.4)
300
+ pg (~> 1.5)
301
+ pgvector (< 0.2)
293
302
  pinecone (~> 0.1.6)
294
303
  pry-byebug (~> 3.10.0)
295
304
  qdrant-ruby (~> 0.9.0)
data/README.md CHANGED
@@ -284,12 +284,16 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
284
284
 
285
285
  ##### Supported Formats
286
286
 
287
- | Format | Pocessor | Gem Requirements |
288
- | ------ | ---------------- | :--------------------------: |
289
- | docx | Processors::Docx | `gem "docx", "~> 0.8.0"` |
290
- | html | Processors::HTML | `gem "nokogiri", "~> 1.13"` |
291
- | pdf | Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
292
- | text | Processors::Text | |
287
+
288
+ | Format | Pocessor | Gem Requirements |
289
+ | ------ | ---------------------------- | :--------------------------: |
290
+ | docx | Langchain::Processors::Docx | `gem "docx", "~> 0.8.0"` |
291
+ | html | Langchain::Processors::HTML | `gem "nokogiri", "~> 1.13"` |
292
+ | pdf | Langchain::Processors::PDF | `gem "pdf-reader", "~> 1.4"` |
293
+ | text | Langchain::Processors::Text | |
294
+ | JSON | Langchain::Processors::JSON | |
295
+ | JSONL | Langchain::Processors::JSONL | |
296
+ | csv | Langchain::Processors::CSV | |
293
297
 
294
298
  ## Examples
295
299
  Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -317,6 +321,7 @@ Langchain.logger.level = :info
317
321
  [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
318
322
  [<img style="border-radius:50%" alt="Rafael Figueiredo" src="https://avatars.githubusercontent.com/u/35845775?v=4" width="80" height="80" class="avatar">](https://github.com/rafaelqfigueiredo)
319
323
  [<img style="border-radius:50%" alt="Ricky Chilcott" src="https://avatars.githubusercontent.com/u/445759?v=4" width="80" height="80" class="avatar">](https://github.com/rickychilcott)
324
+ [<img style="border-radius:50%" alt="Alex Chaplinsky" src="https://avatars.githubusercontent.com/u/695947?v=4" width="80" height="80" class="avatar">](https://github.com/alchaplinsky)
320
325
 
321
326
  (Criteria for becoming an Honorary Contributor or Core Contributor is pending...)
322
327
 
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class Data
5
+ attr_reader :source
6
+
7
+ def initialize(data, options = {})
8
+ @source = options[:source]
9
+ @data = data
10
+ end
11
+
12
+ def value
13
+ @data
14
+ end
15
+ end
16
+ end
@@ -58,11 +58,12 @@ module Langchain
58
58
  end
59
59
 
60
60
  def process(&block)
61
- data, processor = yield
61
+ raw_data, kind = yield
62
62
 
63
- raise UnknownFormatError unless processor
63
+ raise UnknownFormatError unless kind
64
64
 
65
- Langchain::Processors.const_get(processor).new.parse(data)
65
+ processor = Langchain::Processors.const_get(kind).new
66
+ Langchain::Data.new(processor.parse(raw_data), source: @path)
66
67
  end
67
68
 
68
69
  def find_processor(constant, value)
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Langchain
6
+ module Processors
7
+ class CSV < Base
8
+ EXTENSIONS = [".csv"]
9
+ CONTENT_TYPES = ["text/csv"]
10
+
11
+ # Parse the document and return the text
12
+ # @param [File] data
13
+ # @return [Array of Hash]
14
+ def parse(data)
15
+ ::CSV.new(data.read).map do |row|
16
+ row.map(&:strip)
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSON < Base
6
+ EXTENSIONS = [".json"]
7
+ CONTENT_TYPES = ["application/json"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Hash]
12
+ def parse(data)
13
+ ::JSON.parse(data.read)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class JSONL < Base
6
+ EXTENSIONS = [".jsonl"]
7
+ CONTENT_TYPES = ["application/jsonl", "application/json-lines", "application/jsonlines"]
8
+
9
+ # Parse the document and return the text
10
+ # @param [File] data
11
+ # @return [Array of Hash]
12
+ def parse(data)
13
+ data.read.lines.map do |line|
14
+ ::JSON.parse(line)
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/langchain.rb CHANGED
@@ -17,13 +17,17 @@ module Langchain
17
17
  @root = Pathname.new(__dir__)
18
18
 
19
19
  autoload :Loader, "langchain/loader"
20
+ autoload :Data, "langchain/data"
20
21
 
21
22
  module Processors
22
23
  autoload :Base, "langchain/processors/base"
23
- autoload :PDF, "langchain/processors/pdf"
24
+ autoload :CSV, "langchain/processors/csv"
25
+ autoload :Docx, "langchain/processors/docx"
24
26
  autoload :HTML, "langchain/processors/html"
27
+ autoload :JSON, "langchain/processors/json"
28
+ autoload :JSONL, "langchain/processors/jsonl"
29
+ autoload :PDF, "langchain/processors/pdf"
25
30
  autoload :Text, "langchain/processors/text"
26
- autoload :Docx, "langchain/processors/docx"
27
31
  end
28
32
  end
29
33
 
@@ -37,6 +41,7 @@ module Vectorsearch
37
41
  autoload :Chroma, "vectorsearch/chroma"
38
42
  autoload :Milvus, "vectorsearch/milvus"
39
43
  autoload :Pinecone, "vectorsearch/pinecone"
44
+ autoload :Pgvector, "vectorsearch/pgvector"
40
45
  autoload :Qdrant, "vectorsearch/qdrant"
41
46
  autoload :Weaviate, "vectorsearch/weaviate"
42
47
  end
data/lib/llm/base.rb CHANGED
@@ -33,6 +33,11 @@ module LLM
33
33
  raise NotImplementedError, "#{self.class.name} does not support generating embeddings"
34
34
  end
35
35
 
36
+ # Method supported by an LLM that summarizes a given text
37
+ def summarize(...)
38
+ raise NotImplementedError, "#{self.class.name} does not support summarization"
39
+ end
40
+
36
41
  # Ensure that the LLM value passed in is supported
37
42
  # @param llm [Symbol] The LLM to use
38
43
  def self.validate_llm!(llm:)
data/lib/llm/cohere.rb CHANGED
@@ -16,9 +16,12 @@ module LLM
16
16
  @client = ::Cohere::Client.new(api_key: api_key)
17
17
  end
18
18
 
19
+ #
19
20
  # Generate an embedding for a given text
21
+ #
20
22
  # @param text [String] The text to generate an embedding for
21
23
  # @return [Hash] The embedding
24
+ #
22
25
  def embed(text:)
23
26
  response = client.embed(
24
27
  texts: [text],
@@ -27,9 +30,12 @@ module LLM
27
30
  response.dig("embeddings").first
28
31
  end
29
32
 
33
+ #
30
34
  # Generate a completion for a given prompt
35
+ #
31
36
  # @param prompt [String] The prompt to generate a completion for
32
37
  # @return [Hash] The completion
38
+ #
33
39
  def complete(prompt:, **params)
34
40
  default_params = {
35
41
  prompt: prompt,
@@ -51,5 +57,16 @@ module LLM
51
57
  def chat(...)
52
58
  complete(...)
53
59
  end
60
+
61
+ # Generate a summary in English for a given text
62
+ #
63
+ # More parameters available to extend this method with: https://github.com/andreibondarev/cohere-ruby/blob/0.9.4/lib/cohere/client.rb#L107-L115
64
+ #
65
+ # @param text [String] The text to generate a summary for
66
+ # @return [String] The summary
67
+ def summarize(text:)
68
+ response = client.summarize(text: text)
69
+ response.dig("summary")
70
+ end
54
71
  end
55
72
  end
@@ -81,5 +81,25 @@ module LLM
81
81
  response = client.generate_chat_message(**default_params)
82
82
  response.dig("candidates", 0, "content")
83
83
  end
84
+
85
+ #
86
+ # Generate a summarization for a given text
87
+ #
88
+ # @param text [String] The text to generate a summarization for
89
+ # @return [String] The summarization
90
+ #
91
+ def summarize(text:)
92
+ prompt_template = Prompt.load_from_path(
93
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
94
+ )
95
+ prompt = prompt_template.format(text: text)
96
+
97
+ complete(
98
+ prompt: prompt,
99
+ temperature: DEFAULTS[:temperature],
100
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
101
+ max_tokens: 2048
102
+ )
103
+ end
84
104
  end
85
105
  end
@@ -12,6 +12,7 @@ module LLM
12
12
 
13
13
  #
14
14
  # Intialize the HuggingFace LLM
15
+ #
15
16
  # @param api_key [String] The API key to use
16
17
  #
17
18
  def initialize(api_key:)
@@ -21,9 +22,12 @@ module LLM
21
22
  @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
22
23
  end
23
24
 
25
+ #
24
26
  # Generate an embedding for a given text
27
+ #
25
28
  # @param text [String] The text to embed
26
29
  # @return [Array] The embedding
30
+ #
27
31
  def embed(text:)
28
32
  client.embedding(
29
33
  input: text,
data/lib/llm/openai.rb CHANGED
@@ -18,9 +18,12 @@ module LLM
18
18
  @client = ::OpenAI::Client.new(access_token: api_key)
19
19
  end
20
20
 
21
+ #
21
22
  # Generate an embedding for a given text
23
+ #
22
24
  # @param text [String] The text to generate an embedding for
23
25
  # @return [Array] The embedding
26
+ #
24
27
  def embed(text:)
25
28
  response = client.embeddings(
26
29
  parameters: {
@@ -31,9 +34,12 @@ module LLM
31
34
  response.dig("data").first.dig("embedding")
32
35
  end
33
36
 
37
+ #
34
38
  # Generate a completion for a given prompt
39
+ #
35
40
  # @param prompt [String] The prompt to generate a completion for
36
41
  # @return [String] The completion
42
+ #
37
43
  def complete(prompt:, **params)
38
44
  default_params = {
39
45
  model: DEFAULTS[:completion_model_name],
@@ -51,9 +57,12 @@ module LLM
51
57
  response.dig("choices", 0, "text")
52
58
  end
53
59
 
60
+ #
54
61
  # Generate a chat completion for a given prompt
62
+ #
55
63
  # @param prompt [String] The prompt to generate a chat completion for
56
64
  # @return [String] The chat completion
65
+ #
57
66
  def chat(prompt:, **params)
58
67
  default_params = {
59
68
  model: DEFAULTS[:chat_completion_model_name],
@@ -71,5 +80,25 @@ module LLM
71
80
  response = client.chat(parameters: default_params)
72
81
  response.dig("choices", 0, "message", "content")
73
82
  end
83
+
84
+ #
85
+ # Generate a summary for a given text
86
+ #
87
+ # @param text [String] The text to generate a summary for
88
+ # @return [String] The summary
89
+ #
90
+ def summarize(text:)
91
+ prompt_template = Prompt.load_from_path(
92
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
93
+ )
94
+ prompt = prompt_template.format(text: text)
95
+
96
+ complete(
97
+ prompt: prompt,
98
+ temperature: DEFAULTS[:temperature],
99
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
100
+ max_tokens: 2048
101
+ )
102
+ end
74
103
  end
75
104
  end
@@ -0,0 +1,5 @@
1
+ {
2
+ "_type": "prompt",
3
+ "input_variables": ["text"],
4
+ "template": "Write a concise summary of the following:\n\n{text}\n\nCONCISE SUMMARY:"
5
+ }
data/lib/llm/replicate.rb CHANGED
@@ -23,8 +23,11 @@ module LLM
23
23
  dimension: 384
24
24
  }.freeze
25
25
 
26
+ #
26
27
  # Intialize the Replicate LLM
28
+ #
27
29
  # @param api_key [String] The API key to use
30
+ #
28
31
  def initialize(api_key:)
29
32
  depends_on "replicate-ruby"
30
33
  require "replicate"
@@ -36,9 +39,12 @@ module LLM
36
39
  @client = ::Replicate.client
37
40
  end
38
41
 
42
+ #
39
43
  # Generate an embedding for a given text
44
+ #
40
45
  # @param text [String] The text to generate an embedding for
41
46
  # @return [Hash] The embedding
47
+ #
42
48
  def embed(text:)
43
49
  response = embeddings_model.predict(input: text)
44
50
 
@@ -50,9 +56,12 @@ module LLM
50
56
  response.output
51
57
  end
52
58
 
59
+ #
53
60
  # Generate a completion for a given prompt
61
+ #
54
62
  # @param prompt [String] The prompt to generate a completion for
55
63
  # @return [Hash] The completion
64
+ #
56
65
  def complete(prompt:, **params)
57
66
  response = completion_model.predict(prompt: prompt)
58
67
 
@@ -73,6 +82,26 @@ module LLM
73
82
  complete(...)
74
83
  end
75
84
 
85
+ #
86
+ # Generate a summary for a given text
87
+ #
88
+ # @param text [String] The text to generate a summary for
89
+ # @return [String] The summary
90
+ #
91
+ def summarize(text:)
92
+ prompt_template = Prompt.load_from_path(
93
+ file_path: Langchain.root.join("llm/prompts/summarize_template.json")
94
+ )
95
+ prompt = prompt_template.format(text: text)
96
+
97
+ complete(
98
+ prompt: prompt,
99
+ temperature: DEFAULTS[:temperature],
100
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
101
+ max_tokens: 2048
102
+ )
103
+ end
104
+
76
105
  alias_method :generate_embedding, :embed
77
106
 
78
107
  private
@@ -74,7 +74,7 @@ module Vectorsearch
74
74
 
75
75
  texts = Array(path || paths)
76
76
  .flatten
77
- .map { |path| Langchain::Loader.new(path)&.load }
77
+ .map { |path| Langchain::Loader.new(path)&.load&.value }
78
78
  .compact
79
79
 
80
80
  add_texts(texts: texts)
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Vectorsearch
4
+ # The PostgreSQL vector search adapter
5
+ class Pgvector < Base
6
+ # @param url [String] The URL of the PostgreSQL database
7
+ # @param index_name [String] The name of the table to use for the index
8
+ # @param llm [String] The URL of the Language Layer API
9
+ # @param llm_api_key [String] The API key for the Language Layer API
10
+ # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
11
+ def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
12
+ require "pg"
13
+ require "pgvector"
14
+
15
+ @client = ::PG.connect(url)
16
+ registry = ::PG::BasicTypeRegistry.new.define_default_types
17
+ ::Pgvector::PG.register_vector(registry)
18
+ @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
19
+
20
+ @index_name = index_name
21
+
22
+ super(llm: llm, llm_api_key: llm_api_key)
23
+ end
24
+
25
+ # Add a list of texts to the index
26
+ # @param texts [Array<String>] The texts to add to the index
27
+ # @return [PG::Result] The response from the database
28
+ def add_texts(texts:)
29
+ data = texts.flat_map do |text|
30
+ [text, llm_client.embed(text: text)]
31
+ end
32
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
33
+ client.exec_params(
34
+ "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
35
+ data
36
+ )
37
+ end
38
+
39
+ # Create default schema
40
+ # @return [PG::Result] The response from the database
41
+ def create_default_schema
42
+ client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
43
+ client.exec(
44
+ <<~SQL
45
+ CREATE TABLE IF NOT EXISTS #{@index_name} (
46
+ id serial PRIMARY KEY,
47
+ content TEXT,
48
+ vectors VECTOR(#{default_dimension})
49
+ );
50
+ SQL
51
+ )
52
+ end
53
+
54
+ # Search for similar texts in the index
55
+ # @param query [String] The text to search for
56
+ # @param k [Integer] The number of top results to return
57
+ # @return [Array<Hash>] The results of the search
58
+ def similarity_search(query:, k: 4)
59
+ embedding = llm_client.embed(text: query)
60
+
61
+ similarity_search_by_vector(
62
+ embedding: embedding,
63
+ k: k
64
+ )
65
+ end
66
+
67
+ # Search for similar texts in the index by the passed in vector.
68
+ # You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.
69
+ # @param embedding [Array<Float>] The vector to search for
70
+ # @param k [Integer] The number of top results to return
71
+ # @return [Array<Hash>] The results of the search
72
+ def similarity_search_by_vector(embedding:, k: 4)
73
+ result = client.transaction do |conn|
74
+ conn.exec("SET LOCAL ivfflat.probes = 10;")
75
+ query = <<~SQL
76
+ SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
77
+ SQL
78
+ conn.exec_params(query, [embedding, k])
79
+ end
80
+
81
+ result.to_a
82
+ end
83
+
84
+ # Ask a question and return the answer
85
+ # @param question [String] The question to ask
86
+ # @return [String] The answer to the question
87
+ def ask(question:)
88
+ search_results = similarity_search(query: question)
89
+
90
+ context = search_results.map do |result|
91
+ result["content"].to_s
92
+ end
93
+ context = context.join("\n---\n")
94
+
95
+ prompt = generate_prompt(question: question, context: context)
96
+
97
+ llm_client.chat(prompt: prompt)
98
+ end
99
+ end
100
+ end
@@ -14,9 +14,7 @@ module Vectorsearch
14
14
 
15
15
  @client = ::Weaviate::Client.new(
16
16
  url: url,
17
- api_key: api_key,
18
- model_service: llm,
19
- model_service_api_key: llm_api_key
17
+ api_key: api_key
20
18
  )
21
19
  @index_name = index_name
22
20
 
@@ -30,7 +28,8 @@ module Vectorsearch
30
28
  objects = Array(texts).map do |text|
31
29
  {
32
30
  class: index_name,
33
- properties: {content: text}
31
+ properties: {content: text},
32
+ vector: llm_client.embed(text: text)
34
33
  }
35
34
  end
36
35
 
@@ -43,11 +42,7 @@ module Vectorsearch
43
42
  def create_default_schema
44
43
  client.schema.create(
45
44
  class_name: index_name,
46
- vectorizer: "text2vec-#{llm}",
47
- # TODO: Figure out a way to optionally enable it
48
- # "module_config": {
49
- # "qna-openai": {}
50
- # },
45
+ vectorizer: "none",
51
46
  properties: [
52
47
  # TODO: Allow passing in your own IDs
53
48
  {
@@ -63,14 +58,9 @@ module Vectorsearch
63
58
  # @param k [Integer|String] The number of results to return
64
59
  # @return [Hash] The search results
65
60
  def similarity_search(query:, k: 4)
66
- near_text = "{ concepts: [\"#{query}\"] }"
61
+ embedding = llm_client.embed(text: query)
67
62
 
68
- client.query.get(
69
- class_name: index_name,
70
- near_text: near_text,
71
- limit: k.to_s,
72
- fields: "content _additional { id }"
73
- )
63
+ similarity_search_by_vector(embedding: embedding, k: k)
74
64
  end
75
65
 
76
66
  # Return documents similar to the vector
@@ -92,29 +82,16 @@ module Vectorsearch
92
82
  # @param question [String] The question to ask
93
83
  # @return [Hash] The answer
94
84
  def ask(question:)
95
- # Weaviate currently supports the `ask:` parameter only for the OpenAI LLM (with `qna-openai` module enabled).
96
- # The Cohere support is on the way: https://github.com/weaviate/weaviate/pull/2600
97
- if llm == :openai
98
- ask_object = "{ question: \"#{question}\" }"
99
-
100
- client.query.get(
101
- class_name: index_name,
102
- ask: ask_object,
103
- limit: "1",
104
- fields: "_additional { answer { result } }"
105
- )
106
- elsif llm == :cohere
107
- search_results = similarity_search(query: question)
85
+ search_results = similarity_search(query: question)
108
86
 
109
- context = search_results.map do |result|
110
- result.dig("content").to_s
111
- end
112
- context = context.join("\n---\n")
87
+ context = search_results.map do |result|
88
+ result.dig("content").to_s
89
+ end
90
+ context = context.join("\n---\n")
113
91
 
114
- prompt = generate_prompt(question: question, context: context)
92
+ prompt = generate_prompt(question: question, context: context)
115
93
 
116
- llm_client.chat(prompt: prompt)
117
- end
94
+ llm_client.chat(prompt: prompt)
118
95
  end
119
96
  end
120
97
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.12"
4
+ VERSION = "0.3.14"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.12
4
+ version: 0.3.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-25 00:00:00.000000000 Z
11
+ date: 2023-05-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -164,6 +164,34 @@ dependencies:
164
164
  - - "~>"
165
165
  - !ruby/object:Gem::Version
166
166
  version: '1.13'
167
+ - !ruby/object:Gem::Dependency
168
+ name: pg
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.5'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.5'
181
+ - !ruby/object:Gem::Dependency
182
+ name: pgvector
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "<"
186
+ - !ruby/object:Gem::Version
187
+ version: '0.2'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "<"
193
+ - !ruby/object:Gem::Version
194
+ version: '0.2'
167
195
  - !ruby/object:Gem::Dependency
168
196
  name: pdf-reader
169
197
  requirement: !ruby/object:Gem::Requirement
@@ -288,10 +316,14 @@ files:
288
316
  - lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
289
317
  - lib/dependency_helper.rb
290
318
  - lib/langchain.rb
319
+ - lib/langchain/data.rb
291
320
  - lib/langchain/loader.rb
292
321
  - lib/langchain/processors/base.rb
322
+ - lib/langchain/processors/csv.rb
293
323
  - lib/langchain/processors/docx.rb
294
324
  - lib/langchain/processors/html.rb
325
+ - lib/langchain/processors/json.rb
326
+ - lib/langchain/processors/jsonl.rb
295
327
  - lib/langchain/processors/pdf.rb
296
328
  - lib/langchain/processors/text.rb
297
329
  - lib/langchainrb.rb
@@ -300,6 +332,7 @@ files:
300
332
  - lib/llm/google_palm.rb
301
333
  - lib/llm/hugging_face.rb
302
334
  - lib/llm/openai.rb
335
+ - lib/llm/prompts/summarize_template.json
303
336
  - lib/llm/replicate.rb
304
337
  - lib/prompt/base.rb
305
338
  - lib/prompt/few_shot_prompt_template.rb
@@ -312,6 +345,7 @@ files:
312
345
  - lib/vectorsearch/base.rb
313
346
  - lib/vectorsearch/chroma.rb
314
347
  - lib/vectorsearch/milvus.rb
348
+ - lib/vectorsearch/pgvector.rb
315
349
  - lib/vectorsearch/pinecone.rb
316
350
  - lib/vectorsearch/qdrant.rb
317
351
  - lib/vectorsearch/weaviate.rb