langchainrb 0.8.0 → 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8ea2adff257b4151b8acf24a02de851df2d99fe8890d6afd06bcdc3a5f53e9e1
4
- data.tar.gz: 646a5f9246bffc20654672393f9175c1f0f30533ba1546cef05ce951d449c9ec
3
+ metadata.gz: 5dd13c5aae47af13fe248636ed88bd40d0e241291ab5c3dc2d5925dcc742af37
4
+ data.tar.gz: b190f73403a77b4ea4d1f9869423546d584df32785ae342a01d9a72ee5fe04fd
5
5
  SHA512:
6
- metadata.gz: 3b2aaace63c46b7eec9d8cc04a2cd9cc84c79c90a5a1f1ce1bcb11e4416021f89293d40309ca35b0e4dbb2036a2962bde0faa28ad46d081846dcb00a9a1bf783
7
- data.tar.gz: fd5e8e03053ab99a737b3ce17c12ae76da2bc1d0b4bda89eb16e16afe43f260325af78a7c62faf0041c8869cbd94c0a5bbbda920bb7e1d7f175ac35545b53f00
6
+ metadata.gz: 81dd80f49173e3d711a713b6dd365addf04129cb0f6c015d6909200a709780e30c39888f0bccba72035e03c17a0b01a4d1456e6431473149d9969907435f18c1
7
+ data.tar.gz: 748f841cf01b802e81bc6f6ecf8aaea5ab13593363afadc7c9634446c169812064dd41af3e58e87068a224972be85f00b1e3c2669a99e1406819507c86b1a15c
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.8.1]
4
+ - Support for Epsilla vector DB
5
+ - Fully functioning Google Vertex AI LLM
6
+ - Bug fixes
7
+
3
8
  ## [0.8.0]
4
9
  - [BREAKING] Updated llama_cpp.rb to 0.9.4. The model file format used by the underlying llama.cpp library has changed to GGUF. llama.cpp ships with scripts to convert existing files and GGUF format models can be downloaded from HuggingFace.
5
10
  - Introducing Langchain::LLM::GoogleVertexAi LLM provider
data/README.md CHANGED
@@ -90,22 +90,22 @@ llm.embed(text: "foo bar")
90
90
 
91
91
  Generate a text completion:
92
92
  ```ruby
93
- llm.complete(prompt: "What is the meaning of life?")
93
+ llm.complete(prompt: "What is the meaning of life?").completion
94
94
  ```
95
95
 
96
96
  Generate a chat completion:
97
97
  ```ruby
98
- llm.chat(prompt: "Hey! How are you?")
98
+ llm.chat(prompt: "Hey! How are you?").completion
99
99
  ```
100
100
 
101
101
  Summarize the text:
102
102
  ```ruby
103
- llm.complete(text: "...")
103
+ llm.summarize(text: "...").completion
104
104
  ```
105
105
 
106
106
  You can use any other LLM by invoking the same interface:
107
107
  ```ruby
108
- llm = Langchain::LLM::GooglePalm.new(...)
108
+ llm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"], default_options: { ... })
109
109
  ```
110
110
 
111
111
  ### Prompt Management
@@ -251,7 +251,7 @@ Then parse the llm response:
251
251
 
252
252
  ```ruby
253
253
  llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
254
- llm_response = llm.chat(prompt: prompt_text)
254
+ llm_response = llm.chat(prompt: prompt_text).completion
255
255
  parser.parse(llm_response)
256
256
  # {
257
257
  # "name" => "Kim Ji-hyun",
@@ -310,6 +310,7 @@ Langchain.rb provides a convenient unified interface on top of supported vectors
310
310
  | Database | Open-source | Cloud offering |
311
311
  | -------- |:------------------:| :------------: |
312
312
  | [Chroma](https://trychroma.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ |
313
+ | [Epsilla](https://epsilla.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ |
313
314
  | [Hnswlib](https://github.com/nmslib/hnswlib/?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ |
314
315
  | [Milvus](https://milvus.io/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ Zilliz Cloud |
315
316
  | [Pinecone](https://www.pinecone.io/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ |
@@ -342,6 +343,7 @@ client = Langchain::Vectorsearch::Weaviate.new(
342
343
  You can instantiate any other supported vector search database:
343
344
  ```ruby
344
345
  client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.6.0"`
346
+ client = Langchain::Vectorsearch::Epsilla.new(...) # `gem "epsilla-ruby", "~> 0.0.3"`
345
347
  client = Langchain::Vectorsearch::Hnswlib.new(...) # `gem "hnswlib", "~> 0.8.1"`
346
348
  client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.2"`
347
349
  client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
@@ -131,7 +131,7 @@ module Langchain::LLM
131
131
  prompt: prompt,
132
132
  temperature: @defaults[:temperature],
133
133
  # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
134
- max_tokens: 2048
134
+ max_tokens: 256
135
135
  )
136
136
  end
137
137
 
@@ -12,22 +12,30 @@ module Langchain::LLM
12
12
  #
13
13
  class GoogleVertexAi < Base
14
14
  DEFAULTS = {
15
- temperature: 0.2,
15
+ temperature: 0.1, # 0.1 is the default in the API, quite low ("grounded")
16
+ max_output_tokens: 1000,
17
+ top_p: 0.8,
18
+ top_k: 40,
16
19
  dimension: 768,
20
+ completion_model_name: "text-bison", # Optional: tect-bison@001
17
21
  embeddings_model_name: "textembedding-gecko"
18
22
  }.freeze
19
23
 
20
- attr_reader :project_id, :client
24
+ # Google Cloud has a project id and a specific region of deployment.
25
+ # For GenAI-related things, a safe choice is us-central1.
26
+ attr_reader :project_id, :client, :region
21
27
 
22
28
  def initialize(project_id:, default_options: {})
23
29
  depends_on "google-apis-aiplatform_v1"
24
30
 
25
31
  @project_id = project_id
32
+ @region = default_options.fetch :region, "us-central1"
26
33
 
27
34
  @client = Google::Apis::AiplatformV1::AiplatformService.new
28
35
 
29
36
  # TODO: Adapt for other regions; Pass it in via the constructor
30
- @client.root_url = "https://us-central1-aiplatform.googleapis.com/"
37
+ # For the moment only us-central1 available so no big deal.
38
+ @client.root_url = "https://#{@region}-aiplatform.googleapis.com/"
31
39
  @client.authorization = Google::Auth.get_application_default
32
40
 
33
41
  @defaults = DEFAULTS.merge(default_options)
@@ -37,7 +45,7 @@ module Langchain::LLM
37
45
  # Generate an embedding for a given text
38
46
  #
39
47
  # @param text [String] The text to generate an embedding for
40
- # @return [Langchain::LLM::GooglePalmResponse] Response object
48
+ # @return [Langchain::LLM::GoogleVertexAiResponse] Response object
41
49
  #
42
50
  def embed(text:)
43
51
  content = [{content: text}]
@@ -45,11 +53,97 @@ module Langchain::LLM
45
53
 
46
54
  api_path = "projects/#{@project_id}/locations/us-central1/publishers/google/models/#{@defaults[:embeddings_model_name]}"
47
55
 
48
- puts("api_path: #{api_path}")
56
+ # puts("api_path: #{api_path}")
49
57
 
50
58
  response = client.predict_project_location_publisher_model(api_path, request)
51
59
 
52
60
  Langchain::LLM::GoogleVertexAiResponse.new(response.to_h, model: @defaults[:embeddings_model_name])
53
61
  end
62
+
63
+ #
64
+ # Generate a completion for a given prompt
65
+ #
66
+ # @param prompt [String] The prompt to generate a completion for
67
+ # @param params extra parameters passed to GooglePalmAPI::Client#generate_text
68
+ # @return [Langchain::LLM::GooglePalmResponse] Response object
69
+ #
70
+ def complete(prompt:, **params)
71
+ default_params = {
72
+ prompt: prompt,
73
+ temperature: @defaults[:temperature],
74
+ top_k: @defaults[:top_k],
75
+ top_p: @defaults[:top_p],
76
+ max_output_tokens: @defaults[:max_output_tokens],
77
+ model: @defaults[:completion_model_name]
78
+ }
79
+
80
+ if params[:stop_sequences]
81
+ default_params[:stop_sequences] = params.delete(:stop_sequences)
82
+ end
83
+
84
+ if params[:max_output_tokens]
85
+ default_params[:max_output_tokens] = params.delete(:max_output_tokens)
86
+ end
87
+
88
+ # to be tested
89
+ temperature = params.delete(:temperature) || @defaults[:temperature]
90
+ max_output_tokens = default_params.fetch(:max_output_tokens, @defaults[:max_output_tokens])
91
+
92
+ default_params.merge!(params)
93
+
94
+ # response = client.generate_text(**default_params)
95
+ request = Google::Apis::AiplatformV1::GoogleCloudAiplatformV1PredictRequest.new \
96
+ instances: [{
97
+ prompt: prompt # key used to be :content, changed to :prompt
98
+ }],
99
+ parameters: {
100
+ temperature: temperature,
101
+ maxOutputTokens: max_output_tokens,
102
+ topP: 0.8,
103
+ topK: 40
104
+ }
105
+
106
+ response = client.predict_project_location_publisher_model \
107
+ "projects/#{project_id}/locations/us-central1/publishers/google/models/#{@defaults[:completion_model_name]}",
108
+ request
109
+
110
+ Langchain::LLM::GoogleVertexAiResponse.new(response, model: default_params[:model])
111
+ end
112
+
113
+ #
114
+ # Generate a summarization for a given text
115
+ #
116
+ # @param text [String] The text to generate a summarization for
117
+ # @return [String] The summarization
118
+ #
119
+ # TODO(ricc): add params for Temp, topP, topK, MaxTokens and have it default to these 4 values.
120
+ def summarize(text:)
121
+ prompt_template = Langchain::Prompt.load_from_path(
122
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.yaml")
123
+ )
124
+ prompt = prompt_template.format(text: text)
125
+
126
+ complete(
127
+ prompt: prompt,
128
+ # For best temperature, topP, topK, MaxTokens for summarization: see
129
+ # https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-summarization
130
+ temperature: 0.2,
131
+ top_p: 0.95,
132
+ top_k: 40,
133
+ # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
134
+ max_output_tokens: 256
135
+ )
136
+ end
137
+
138
+ def chat(...)
139
+ # https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chathat
140
+ # Chat params: https://cloud.google.com/vertex-ai/docs/samples/aiplatform-sdk-chat
141
+ # \"temperature\": 0.3,\n"
142
+ # + " \"maxDecodeSteps\": 200,\n"
143
+ # + " \"topP\": 0.8,\n"
144
+ # + " \"topK\": 40\n"
145
+ # + "}";
146
+ raise NotImplementedError, "coming soon for Vertex AI.."
147
+ end
54
148
  end
55
149
  end
@@ -9,10 +9,19 @@ module Langchain::LLM
9
9
  super(raw_response, model: model)
10
10
  end
11
11
 
12
+ def completion
13
+ # completions&.dig(0, "output")
14
+ raw_response.predictions[0]["content"]
15
+ end
16
+
12
17
  def embedding
13
18
  embeddings.first
14
19
  end
15
20
 
21
+ def completions
22
+ raw_response.predictions.map { |p| p["content"] }
23
+ end
24
+
16
25
  def total_tokens
17
26
  raw_response.dig(:predictions, 0, :embeddings, :statistics, :token_count)
18
27
  end
@@ -58,7 +58,7 @@ module Langchain::OutputParsers
58
58
  completion: completion,
59
59
  error: e
60
60
  )
61
- )
61
+ ).completion
62
62
  parser.parse(new_completion)
63
63
  end
64
64
 
@@ -33,7 +33,7 @@ module Langchain::Prompt
33
33
  when ".json"
34
34
  config = JSON.parse(File.read(file_path))
35
35
  when ".yaml", ".yml"
36
- config = YAML.safe_load(File.read(file_path))
36
+ config = YAML.safe_load_file(file_path)
37
37
  else
38
38
  raise ArgumentError, "Got unsupported file type #{file_path.extname}"
39
39
  end
@@ -7,6 +7,7 @@ module Langchain::Vectorsearch
7
7
  # == Available vector databases
8
8
  #
9
9
  # - {Langchain::Vectorsearch::Chroma}
10
+ # - {Langchain::Vectorsearch::Epsilla}
10
11
  # - {Langchain::Vectorsearch::Elasticsearch}
11
12
  # - {Langchain::Vectorsearch::Hnswlib}
12
13
  # - {Langchain::Vectorsearch::Milvus}
@@ -29,10 +30,11 @@ module Langchain::Vectorsearch
29
30
  # )
30
31
  #
31
32
  # # You can instantiate other supported vector databases the same way:
33
+ # epsilla = Langchain::Vectorsearch::Epsilla.new(...)
32
34
  # milvus = Langchain::Vectorsearch::Milvus.new(...)
33
35
  # qdrant = Langchain::Vectorsearch::Qdrant.new(...)
34
36
  # pinecone = Langchain::Vectorsearch::Pinecone.new(...)
35
- # chrome = Langchain::Vectorsearch::Chroma.new(...)
37
+ # chroma = Langchain::Vectorsearch::Chroma.new(...)
36
38
  # pgvector = Langchain::Vectorsearch::Pgvector.new(...)
37
39
  #
38
40
  # == Schema Creation
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "securerandom"
4
+ require "json"
5
+ require "timeout"
6
+ require "uri"
7
+
8
+ module Langchain::Vectorsearch
9
+ class Epsilla < Base
10
+ #
11
+ # Wrapper around Epsilla client library
12
+ #
13
+ # Gem requirements:
14
+ # gem "epsilla-ruby", "~> 0.0.3"
15
+ #
16
+ # Usage:
17
+ # epsilla = Langchain::Vectorsearch::Epsilla.new(url:, db_name:, db_path:, index_name:, llm:)
18
+ #
19
+ # Initialize Epsilla client
20
+ # @param url [String] URL to connect to the Epsilla db instance, protocol://host:port
21
+ # @param db_name [String] The name of the database to use
22
+ # @param db_path [String] The path to the database to use
23
+ # @param index_name [String] The name of the Epsilla table to use
24
+ # @param llm [Object] The LLM client to use
25
+ def initialize(url:, db_name:, db_path:, index_name:, llm:)
26
+ depends_on "epsilla-ruby", req: "epsilla"
27
+
28
+ uri = URI.parse(url)
29
+ protocol = uri.scheme
30
+ host = uri.host
31
+ port = uri.port
32
+
33
+ @client = ::Epsilla::Client.new(protocol, host, port)
34
+
35
+ Timeout.timeout(5) do
36
+ status_code, response = @client.database.load_db(db_name, db_path)
37
+
38
+ if status_code != 200
39
+ if status_code == 500 && response["message"].include?("already loaded")
40
+ Langchain.logger.info("Database already loaded")
41
+ else
42
+ raise "Failed to load database: #{response}"
43
+ end
44
+ end
45
+ end
46
+
47
+ @client.database.use_db(db_name)
48
+
49
+ @db_name = db_name
50
+ @db_path = db_path
51
+ @table_name = index_name
52
+
53
+ @vector_dimension = llm.default_dimension
54
+
55
+ super(llm: llm)
56
+ end
57
+
58
+ # Create a table using the index_name passed in the constructor
59
+ def create_default_schema
60
+ status_code, response = @client.database.create_table(@table_name, [
61
+ {"name" => "ID", "dataType" => "STRING", "primaryKey" => true},
62
+ {"name" => "Doc", "dataType" => "STRING"},
63
+ {"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimension}
64
+ ])
65
+ raise "Failed to create table: #{response}" if status_code != 200
66
+
67
+ response
68
+ end
69
+
70
+ # Drop the table using the index_name passed in the constructor
71
+ def destroy_default_schema
72
+ status_code, response = @client.database.drop_table(@table_name)
73
+ raise "Failed to drop table: #{response}" if status_code != 200
74
+
75
+ response
76
+ end
77
+
78
+ # Add a list of texts to the database
79
+ # @param texts [Array<String>] The list of texts to add
80
+ # @param ids [Array<String>] The unique ids to add to the index, in the same order as the texts; if nil, it will be random uuids
81
+ def add_texts(texts:, ids: nil)
82
+ validated_ids = ids
83
+ if ids.nil?
84
+ validated_ids = texts.map { SecureRandom.uuid }
85
+ elsif ids.length != texts.length
86
+ raise "The number of ids must match the number of texts"
87
+ end
88
+
89
+ data = texts.map.with_index do |text, idx|
90
+ {Doc: text, Embedding: llm.embed(text: text).embedding, ID: validated_ids[idx]}
91
+ end
92
+
93
+ status_code, response = @client.database.insert(@table_name, data)
94
+ raise "Failed to insert texts: #{response}" if status_code != 200
95
+ response
96
+ end
97
+
98
+ # Search for similar texts
99
+ # @param query [String] The text to search for
100
+ # @param k [Integer] The number of results to return
101
+ # @return [String] The response from the server
102
+ def similarity_search(query:, k: 4)
103
+ embedding = llm.embed(text: query).embedding
104
+
105
+ similarity_search_by_vector(
106
+ embedding: embedding,
107
+ k: k
108
+ )
109
+ end
110
+
111
+ # Search for entries by embedding
112
+ # @param embedding [Array<Float>] The embedding to search for
113
+ # @param k [Integer] The number of results to return
114
+ # @return [String] The response from the server
115
+ def similarity_search_by_vector(embedding:, k: 4)
116
+ status_code, response = @client.database.query(@table_name, "Embedding", embedding, ["Doc"], k, false)
117
+ raise "Failed to do similarity search: #{response}" if status_code != 200
118
+
119
+ data = JSON.parse(response)["result"]
120
+ data.map { |result| result["Doc"] }
121
+ end
122
+
123
+ # Ask a question and return the answer
124
+ # @param question [String] The question to ask
125
+ # @param k [Integer] The number of results to have in context
126
+ # @yield [String] Stream responses back one String at a time
127
+ # @return [String] The answer to the question
128
+ def ask(question:, k: 4, &block)
129
+ search_results = similarity_search(query: question, k: k)
130
+
131
+ context = search_results.map do |result|
132
+ result.to_s
133
+ end
134
+ context = context.join("\n---\n")
135
+
136
+ prompt = generate_rag_prompt(question: question, context: context)
137
+
138
+ response = llm.chat(prompt: prompt, &block)
139
+ response.context = context
140
+ response
141
+ end
142
+ end
143
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.8.0"
4
+ VERSION = "0.8.1"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.0
4
+ version: 0.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-11-29 00:00:00.000000000 Z
11
+ date: 2023-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -276,6 +276,20 @@ dependencies:
276
276
  - - "~>"
277
277
  - !ruby/object:Gem::Version
278
278
  version: 8.2.0
279
+ - !ruby/object:Gem::Dependency
280
+ name: epsilla-ruby
281
+ requirement: !ruby/object:Gem::Requirement
282
+ requirements:
283
+ - - "~>"
284
+ - !ruby/object:Gem::Version
285
+ version: 0.0.4
286
+ type: :development
287
+ prerelease: false
288
+ version_requirements: !ruby/object:Gem::Requirement
289
+ requirements:
290
+ - - "~>"
291
+ - !ruby/object:Gem::Version
292
+ version: 0.0.4
279
293
  - !ruby/object:Gem::Dependency
280
294
  name: eqn
281
295
  requirement: !ruby/object:Gem::Requirement
@@ -688,6 +702,7 @@ files:
688
702
  - lib/langchain/vectorsearch/base.rb
689
703
  - lib/langchain/vectorsearch/chroma.rb
690
704
  - lib/langchain/vectorsearch/elasticsearch.rb
705
+ - lib/langchain/vectorsearch/epsilla.rb
691
706
  - lib/langchain/vectorsearch/hnswlib.rb
692
707
  - lib/langchain/vectorsearch/milvus.rb
693
708
  - lib/langchain/vectorsearch/pgvector.rb