speaky 0.1.0 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 324621bdf6ab39e6bc0684802da3ab22ca135e84131ce203c013dfe4ecaf88ba
4
- data.tar.gz: c4bcfaa3aaa8b4e01847a28b575e18e4c7ea71a28ef01e21ddf90df03a158ec8
3
+ metadata.gz: 24debd882e7071392c95c5a0f916fd0354f933ced5ce94f6a260e11c9766d5f9
4
+ data.tar.gz: 9e2238fb22c50493a97cde8799c645582ecd4f1527d7f8420adeed4296165479
5
5
  SHA512:
6
- metadata.gz: cffab9177f8fa227a37fa8a863b2c3f27d0b9ceb21fca929939a38be44296b4afcc54100ac62bd68ae8584afda053e1b05bcf6d56ca09367489a1058e8934f9a
7
- data.tar.gz: 93021aa4edae4c59dda48956c8929bc14f5961342aea042d712a241cd286eaf4664458c4b490b6eae2b073efeb8324d98e559ba66d80d456f1b08bbdbdde5047
6
+ metadata.gz: '009c0303f434bcf2e15fef4f047da10db442ef9ec1a1d4c691e411ddfe34abe3a27e9820ae685978b49e62fdfe0af0c8554bd637a5b19cb709b90d7afd7a1c75'
7
+ data.tar.gz: 411eeca77146779120e94f6f62e9e69bad488ca4f8ea4f4838bfc586e902411143487fdeb5ddb35e02f80366a34f777bc64c7b0e9030077ba2c9c3b3d6e72135
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright 2024 Gregorio Galante
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,102 @@
1
+ # Speaky
2
+
3
+ **UNDER DEVELOPMENT**
4
+
5
+ Store activerecord models in vector stores and query them with LLMs!
6
+
7
+ ## Installation
8
+
9
+ Add the gem to your Gemfile:
10
+
11
+ ```ruby
12
+ gem 'speaky'
13
+ ```
14
+
15
+ Install the gem:
16
+
17
+ ```bash
18
+ bundle install
19
+ ```
20
+
21
+ Create a new configuration initializer:
22
+
23
+ ```ruby
24
+ # config/initializers/speaky.rb
25
+
26
+ Speaky.configure do |config|
27
+ # Set the LLM type to use for querying the vector store.
28
+ # - 'openai' -> require 'ruby-openai' gem
29
+ config.llm_type = 'openai'
30
+
31
+ # Set the LLM configuration options.
32
+ # - for 'openai' LLMs, view https://github.com/alexrudall/ruby-openai for configuration options
33
+ config.llm_config = {
34
+ access_token: 'YOUR_ACCESS_TOKEN',
35
+ organization_id: 'YOUR_ORGANIZATION_ID',
36
+ }
37
+
38
+ # Set the vector store type to use for storing model instances.
39
+ # - 'qdrant' -> require 'qdrant-ruby' gem
40
+ config.vectorstore_type = 'qdrant'
41
+
42
+ # Set the vector store configuration options.
43
+ # - for 'qdrant' vector stores, :url, :api_key, :collection_name are required
44
+ config.vectorstore_config = {
45
+ url: 'YOUR_URL',
46
+ api_key: 'YOUR_API_KEY',
47
+ collection_name: 'YOUR_COLLECTION_NAME',
48
+ }
49
+ end
50
+ ```
51
+
52
+ ## Usage
53
+
54
+ To use the gem, include the `Speaky::Concern` module in your ActiveRecord models that should be stored in the vector store:
55
+
56
+ ```ruby
57
+ class MyModel < ApplicationRecord
58
+ include Speaky::Concern
59
+
60
+ def as_speaky
61
+ # This method should return a string representation of the model instance data that should be stored in the vector store. The default implementation is to call `to_json` on the model instance data.
62
+ end
63
+
64
+ # Add any callbacks that should update the model instance data in the vector store.
65
+ after_create :create_for_speaky
66
+ after_update :update_for_speaky
67
+ after_save :save_for_speaky
68
+ after_destroy :destroy_for_speaky
69
+ end
70
+ ```
71
+
72
+ To chat with the LLM, use the `Speaky.chat` method:
73
+
74
+ ```ruby
75
+ # Using default prompt template
76
+ Speaky.ask('What is the capital of France?')
77
+
78
+ # Using custom prompt template
79
+ Speaky.ask(
80
+ 'What is the capital of France?',
81
+ template: 'You are a chatbot. Please answer the following question: {{question}} using this context: {{context}}.'
82
+ )
83
+ ```
84
+
85
+ ## Development
86
+
87
+ 1. Clone the repo
88
+
89
+ 2. Install dependencies with `bundle install`
90
+
91
+ 3. Create your local `.env` file with `cp .env.example .env`
92
+
93
+ 4. Run the tests with `bundle exec rspec`
94
+
95
+ ### Publish a new version
96
+
97
+ 1. Update the version in `lib/speaky/version.rb`
98
+
99
+ 2. Run `ruby bin/publish`
100
+
101
+ ## License
102
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'active_support'
4
+
5
+ module Speaky
6
+ # This is a module that should be used as a Rails concern.
7
+ module Concern
8
+ extend ActiveSupport::Concern
9
+
10
+ def as_speaky
11
+ self.to_json
12
+ end
13
+
14
+ def create_for_speaky
15
+ begin
16
+ Speaky.vectorstore.add(self.id, self.as_speaky)
17
+ rescue StandardError => e
18
+ Rails.logger.error(e)
19
+ errors.add(:base, 'Failed to create for speaky')
20
+ raise ActiveRecord::Rollback
21
+ end
22
+ end
23
+
24
+ def update_for_speaky
25
+ begin
26
+ Speaky.vectorstore.update(self.id, self.as_speaky)
27
+ rescue StandardError => e
28
+ Rails.logger.error(e)
29
+ errors.add(:base, 'Failed to update for speaky')
30
+ raise ActiveRecord::Rollback
31
+ end
32
+ end
33
+
34
+ def destroy_for_speaky
35
+ begin
36
+ Speaky.vectorstore.remove(self.id)
37
+ rescue StandardError => e
38
+ Rails.logger.error(e)
39
+ errors.add(:base, 'Failed to destroy for speaky')
40
+ raise ActiveRecord::Rollback
41
+ end
42
+ end
43
+
44
+ def save_for_speaky
45
+ ActiveRecord::Base.transaction do
46
+ if self.new_record?
47
+ create_for_speaky
48
+ else
49
+ update_for_speaky
50
+ end
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speaky
4
+ # This is a class that stores the configuration of the gem.
5
+ class Config
6
+ # LLM configuration
7
+ attr_accessor :llm_type, :llm_config
8
+
9
+ # Vectorstore configuration
10
+ attr_accessor :vectorstore_type, :vectorstore_config
11
+
12
+ def initialize
13
+ @llm_type = 'openai'
14
+ @llm_config = {}
15
+
16
+ @vectorstore_type = 'qdrant'
17
+ @vectorstore_config = {}
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speaky
4
+ class LlmBase
5
+ def initialize(config)
6
+ @config = config
7
+ end
8
+
9
+ def config
10
+ @config
11
+ end
12
+
13
+ def embed(text)
14
+ raise NotImplementedError
15
+ end
16
+
17
+ def chat(prompt)
18
+ raise NotImplementedError
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "openai"
4
+
5
+ module Speaky
6
+ class LlmOpenai < LlmBase
7
+ def initialize(config)
8
+ @config = config
9
+
10
+ # check if the access token is set
11
+ raise "Openai access token is not set" unless @config[:access_token]
12
+
13
+ # setup client
14
+ @client = OpenAI::Client.new(@config)
15
+
16
+ # setup embeddings params
17
+ # NOTE: This is a hardcoded value for now but can be made configurable in the future by passing it in the config
18
+ @embeddings_params = {
19
+ model: 'text-embedding-3-small',
20
+ dimensions: 1536
21
+ }
22
+
23
+ # setup chat params
24
+ # NOTE: This is a hardcoded value for now but can be made configurable in the future by passing it in the config
25
+ @chat_params = {
26
+ model: "gpt-3.5-turbo",
27
+ max_tokens: 1000
28
+ }
29
+ end
30
+
31
+ def embed(text)
32
+ params = @embeddings_params.merge({
33
+ input: text
34
+ })
35
+
36
+ response = @client.embeddings(parameters: params)
37
+ response["data"].find { |d| d["object"] == "embedding" }["embedding"]
38
+ end
39
+
40
+ def chat(prompt)
41
+ params = @chat_params.merge({
42
+ messages: [
43
+ {
44
+ role: "user",
45
+ content: prompt
46
+ }
47
+ ]
48
+ })
49
+
50
+ response = @client.chat(parameters: params)
51
+
52
+ response["choices"].first.dig("message", "content")
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speaky
4
+ class VectorstoreBase
5
+ def initialize(config)
6
+ @config = config
7
+ end
8
+
9
+ def config
10
+ @config
11
+ end
12
+
13
+ # Add a vector to the vectorstore.
14
+ # NOTE: If the vector already exists, it will be updated.
15
+ def add(id, data)
16
+ raise NotImplementedError
17
+ end
18
+
19
+ # Update a vector in the vectorstore.
20
+ # NOTE: If the vector does not exist, it will be added.
21
+ def update(id, data)
22
+ raise NotImplementedError
23
+ end
24
+
25
+ # Remove a vector from the vectorstore.
26
+ # NOTE: If the vector does not exist, it will be ignored.
27
+ def remove(id)
28
+ raise NotImplementedError
29
+ end
30
+
31
+ def query(question)
32
+ raise NotImplementedError
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Speaky
4
+ class VectorstoreFaiss < VectorstoreBase
5
+ def initialize(config)
6
+ raise 'This class is not implemented yet.' # TEMP
7
+
8
+ @config = config
9
+
10
+ # check if the index path is set
11
+ raise ArgumentError, 'index_path is required' unless @config[:index_path]
12
+
13
+ # load index from index_path if exists
14
+ if File.exist?(@config[:index_path])
15
+ @index = Faiss::Index.load(@config[:index_path])
16
+ else
17
+ # create a new index
18
+ @index = Faiss::IndexFlatL2.new(768)
19
+ @index.save(@config[:index_path])
20
+ end
21
+ end
22
+
23
+ # TODO: Implement the other methods
24
+ end
25
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'qdrant'
4
+
5
+ module Speaky
6
+ class VectorstoreQdrant < VectorstoreBase
7
+ def initialize(config)
8
+ @config = config
9
+
10
+ # check if required fields are set
11
+ raise ArgumentError, 'url is required' unless @config[:url]
12
+ raise ArgumentError, 'api_key is required' unless @config[:api_key]
13
+ raise ArgumentError, 'collection_name is required' unless @config[:collection_name]
14
+
15
+ # setup client
16
+ @client = Qdrant::Client.new(
17
+ url: @config[:url],
18
+ api_key: @config[:api_key]
19
+ )
20
+
21
+ # create collection if it doesn't exist
22
+ collections_get = @client.collections.get(collection_name: @config[:collection_name])
23
+ if !collections_get || collections_get.dig('status') != 'ok'
24
+ collections_create = @client.collections.create(
25
+ collection_name: @config[:collection_name],
26
+ vectors: {
27
+ distance: "Cosine",
28
+ size: 1536
29
+ }
30
+ )
31
+ if !collections_create || collections_create.dig('status') != 'ok'
32
+ raise 'Failed to create collection'
33
+ end
34
+ end
35
+
36
+ # create index for field "id" in collection
37
+ collections_create_index = @client.collections.create_index(collection_name: @config[:collection_name], field_name: 'id', field_schema: 'keyword')
38
+ if !collections_create_index || collections_create_index.dig('status') != 'ok'
39
+ raise 'Failed to create index for field "id" on collection'
40
+ end
41
+ end
42
+
43
+ def add(id, data)
44
+ embeddings = Speaky.llm.embed(data)
45
+
46
+ points_upsert = @client.points.upsert(
47
+ collection_name: @config[:collection_name],
48
+ points: [
49
+ {
50
+ id: id,
51
+ vector: embeddings,
52
+ payload: {
53
+ content: data
54
+ }
55
+ }
56
+ ],
57
+ wait: true
58
+ )
59
+
60
+ if !points_upsert || points_upsert.dig('status') != 'ok'
61
+ raise 'Failed to add vector'
62
+ end
63
+
64
+ true
65
+ end
66
+
67
+ def update(id, data)
68
+ add(id, data)
69
+ end
70
+
71
+ def remove(id)
72
+ points_delete = @client.points.delete(
73
+ collection_name: @config[:collection_name],
74
+ points: [id],
75
+ )
76
+
77
+ if !points_delete || points_delete.dig('status') != 'ok'
78
+ raise 'Failed to remove vector'
79
+ end
80
+
81
+ true
82
+ end
83
+
84
+ def query(question)
85
+ embeddings = Speaky.llm.embed(question)
86
+
87
+ points_search = @client.points.search(
88
+ collection_name: @config[:collection_name],
89
+ limit: 1,
90
+ vector: embeddings,
91
+ with_payload: true,
92
+ with_vector: false
93
+ )
94
+
95
+ if !points_search || points_search.dig('status') != 'ok'
96
+ raise 'Failed to search vectors'
97
+ end
98
+
99
+ points_search.dig('result').first.dig('payload', 'content')
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,3 @@
1
+ module Speaky
2
+ VERSION = '0.1.2'
3
+ end
data/lib/speaky.rb ADDED
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "speaky/version"
4
+ require "speaky/config"
5
+ require "speaky/concern"
6
+
7
+ require "speaky/llm_base"
8
+ require "speaky/llm_openai"
9
+
10
+ require "speaky/vectorstore_base"
11
+ require "speaky/vectorstore_qdrant"
12
+ require "speaky/vectorstore_faiss"
13
+
14
+ module Speaky
15
+ class << self
16
+ # This is a class method that returns a new instance of Config
17
+ # if @config is nil. If it is not nil, it returns the existing
18
+ # instance of Config.
19
+ #
20
+ # Example of usage:
21
+ # Speaky.config.some_value
22
+ def config
23
+ @config ||= Config.new
24
+ end
25
+
26
+ # This is a method that takes a block and yields the config
27
+ # instance to the block.
28
+ #
29
+ # Example of usage:
30
+ # Speaky.configure do |config|
31
+ # config.some_value = "some value"
32
+ # end
33
+ def configure
34
+ yield config
35
+ end
36
+
37
+ # This is a method that returns an instance of VectorstoreBase class.
38
+ #
39
+ # Example of usage:
40
+ # Speaky.vectorstore.method_name
41
+ def vectorstore
42
+ return @vectorstore if defined?(@vectorstore) && @vectorstore
43
+
44
+ case config.vectorstore_type
45
+ when "faiss"
46
+ @vectorstore = VectorstoreFaiss.new(config.vectorstore_config)
47
+ when "qdrant"
48
+ @vectorstore = VectorstoreQdrant.new(config.vectorstore_config)
49
+ else
50
+ raise "Invalid vectorstore type"
51
+ end
52
+ end
53
+
54
+ # This is a method that returns an instance of LlmBase class.
55
+ #
56
+ # Example of usage:
57
+ # Speaky.llm.method_name
58
+ def llm
59
+ return @llm if defined?(@llm) && @llm
60
+
61
+ case config.llm_type
62
+ when "openai"
63
+ @llm = LlmOpenai.new(config.llm_config)
64
+ else
65
+ raise "Invalid llm type"
66
+ end
67
+ end
68
+
69
+ # This is a method that takes a question as an argument and returns
70
+ # the answer to the question from the LLM.
71
+ #
72
+ # Example of usage:
73
+ # Speaky.ask("What is the capital of France?")
74
+ def ask(question, template: nil, **other_params)
75
+ # load template
76
+ default_template = <<~TEMPLATE
77
+ You are an AI assistant. You are asked a question and you provide an answer.
78
+ Use the provided context to generate the answer to the question.
79
+
80
+ Context:
81
+ {{context}}
82
+
83
+ Question:
84
+ {{question}}
85
+ TEMPLATE
86
+ template ||= default_template
87
+
88
+ # load context
89
+ context = vectorstore.query(question)
90
+
91
+ # generate prompt
92
+ prompt = template.gsub("{{context}}", context).gsub("{{question}}", question)
93
+ other_params.each do |key, value|
94
+ prompt.gsub!("{{#{key}}}", value)
95
+ end
96
+
97
+ # ask the question
98
+ llm.chat(prompt)
99
+ end
100
+ end
101
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speaky
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregorio Galante
@@ -16,7 +16,18 @@ email:
16
16
  executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
- files: []
19
+ files:
20
+ - MIT-LICENSE
21
+ - README.md
22
+ - lib/speaky.rb
23
+ - lib/speaky/concern.rb
24
+ - lib/speaky/config.rb
25
+ - lib/speaky/llm_base.rb
26
+ - lib/speaky/llm_openai.rb
27
+ - lib/speaky/vectorstore_base.rb
28
+ - lib/speaky/vectorstore_faiss.rb
29
+ - lib/speaky/vectorstore_qdrant.rb
30
+ - lib/speaky/version.rb
20
31
  homepage: https://github.com/GAMS-Software/speaky
21
32
  licenses:
22
33
  - MIT