boxcars 0.2.10 → 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7d76e1223cfa17d11ab3670ebc6d43a8dbdc142e6aa85bd6bb6c230ccdea160
4
- data.tar.gz: 39172ff71234851bcef10c762ba29481b4dba199e851c37e8092794ed24604b1
3
+ metadata.gz: da3d8b9838602151837c0cc5bb9f3cae841ba24d1c338eade82c7807d913d4bb
4
+ data.tar.gz: f7be434c18f0ff2c95625fe32fae25f3a5df265331425d0c1f0430ab75761578
5
5
  SHA512:
6
- metadata.gz: b720092a75593e767564234f5990ccb7e57a382f7e5065c5cbaf9496ca0622ceb9dc63f07c10ff1ba305672c42817f94b371fadc3562b0043cfcde83ef2f1a8d
7
- data.tar.gz: 2ded919f3e0157d777b541589d48472b0cceb348f1f9bfeb18f63b29dad25a4d23183a34aa111db726c366bbe0b74283d34a1c0e14372b726448e9c69961dc65
6
+ metadata.gz: 57dd238c56f13f63a4665d4469efdabfa5f3c54f82cb6832c4158858d4b307a80c57f619633cdad6934d64186d560dcab7a62efa9adc727edfa61afbc5acc188
7
+ data.tar.gz: d2c782acf20c6b6b13cbfadf8f5406363b347be90a058626ec1bb21fe32baf1acb57a4a72c4770a7ad820700b465c0474a498080604e23e5d0270001d5d4aec1
data/CHANGELOG.md CHANGED
@@ -1,13 +1,25 @@
1
1
  # Changelog
2
2
 
3
- ## [Unreleased](https://github.com/BoxcarsAI/boxcars/tree/HEAD)
3
+ ## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
4
4
 
5
- [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...HEAD)
5
+ [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
6
6
 
7
7
  **Implemented enhancements:**
8
8
 
9
9
  - Notion Q&A [\#13](https://github.com/BoxcarsAI/boxcars/issues/13)
10
10
 
11
+ **Closed issues:**
12
+
13
+ - undefined method `default\_train' for Boxcars:Module \(ActiveRecord example\) [\#66](https://github.com/BoxcarsAI/boxcars/issues/66)
14
+ - Chore: reduce the number of markdown files in Notion DB folder [\#56](https://github.com/BoxcarsAI/boxcars/issues/56)
15
+
16
+ **Merged pull requests:**
17
+
18
+ - \[DRAFT\] Feature - add in memory vector store [\#65](https://github.com/BoxcarsAI/boxcars/pull/65) ([jaigouk](https://github.com/jaigouk))
19
+ - Chore - rename module name from Embeddings to VectorStores [\#63](https://github.com/BoxcarsAI/boxcars/pull/63) ([jaigouk](https://github.com/jaigouk))
20
+ - remove bunch of markdown files in Notion\_DB directory [\#62](https://github.com/BoxcarsAI/boxcars/pull/62) ([jaigouk](https://github.com/jaigouk))
21
+ - Fixed typo in README.md [\#61](https://github.com/BoxcarsAI/boxcars/pull/61) ([robmack](https://github.com/robmack))
22
+
11
23
  ## [v0.2.9](https://github.com/BoxcarsAI/boxcars/tree/v0.2.9) (2023-04-22)
12
24
 
13
25
  [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.8...v0.2.9)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.10)
4
+ boxcars (0.2.11)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
7
  ruby-openai (~> 3.0)
@@ -156,4 +156,4 @@ require "boxcars/boxcar/wikipedia_search"
156
156
  require "boxcars/boxcar/sql"
157
157
  require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
- require "boxcars/boxcar/vector_search"
159
+ require "boxcars/vector_search"
@@ -8,4 +8,4 @@ module Boxcars
8
8
  end
9
9
  end
10
10
 
11
- require "boxcars/boxcar/vector_store"
11
+ require "boxcars/vector_store"
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  class Document
6
6
  attr_accessor :page_content, :metadata
7
7
 
@@ -3,7 +3,7 @@
3
3
  require 'openai'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  class EmbedViaOpenAI
8
8
  include VectorStore
9
9
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  class EmbedViaTensorflow
6
6
  include VectorStore
7
7
  def call
@@ -5,7 +5,7 @@ require 'hnswlib'
5
5
  require 'json'
6
6
 
7
7
  module Boxcars
8
- module VectorStores
8
+ module VectorStore
9
9
  module Hnswlib
10
10
  class BuildVectorStore
11
11
  include VectorStore
@@ -76,7 +76,7 @@ module Boxcars
76
76
 
77
77
  docs = []
78
78
  data.each do |chunk|
79
- doc_output = Boxcars::VectorStores::SplitText.call(
79
+ doc_output = Boxcars::VectorStore::SplitText.call(
80
80
  separator: "\n", chunk_size: split_chunk_size, chunk_overlap: 0, text: chunk
81
81
  )
82
82
  docs.concat(doc_output)
@@ -98,7 +98,7 @@ module Boxcars
98
98
 
99
99
  puts "Initializing Store..."
100
100
  openai_client = Openai.open_ai_client
101
- embeddings_with_dim = Boxcars::VectorStores::EmbedViaOpenAI.call(texts: documents, client: openai_client)
101
+ embeddings_with_dim = Boxcars::VectorStore::EmbedViaOpenAI.call(texts: documents, client: openai_client)
102
102
  document_embeddings = embeddings_with_dim.map.with_index do |item, index|
103
103
  { doc_id: index, embedding: item[:embedding], document: documents[index] }
104
104
  end
@@ -110,7 +110,7 @@ module Boxcars
110
110
  return true unless rebuild_required?
111
111
 
112
112
  puts "Saving Vectorstore"
113
- Boxcars::VectorStores::Hnswlib::SaveToHnswlib.call(
113
+ Boxcars::VectorStore::Hnswlib::SaveToHnswlib.call(
114
114
  document_embeddings: embeddings_with_config[:document_embeddings],
115
115
  index_file_path: index_file_path,
116
116
  json_doc_file_path: json_doc_file_path,
@@ -121,7 +121,7 @@ module Boxcars
121
121
 
122
122
  def hnswlib_config(dim)
123
123
  # dim: length of datum point vector that will be indexed.
124
- Boxcars::VectorStores::Hnswlib::HnswlibConfig.new(
124
+ Boxcars::VectorStore::Hnswlib::HnswlibConfig.new(
125
125
  metric: "l2", max_item: 10000, dim: dim
126
126
  )
127
127
  end
@@ -3,7 +3,7 @@
3
3
  require 'json'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  module Hnswlib
8
8
  class HnswlibConfig
9
9
  attr_reader :metric, :max_item, :dim, :ef_construction, :m
@@ -4,7 +4,7 @@ require 'hnswlib'
4
4
  require 'json'
5
5
 
6
6
  module Boxcars
7
- module VectorStores
7
+ module VectorStore
8
8
  module Hnswlib
9
9
  class HnswlibSearch
10
10
  def initialize(vector_store:, options: {})
@@ -5,14 +5,14 @@ require 'json'
5
5
  require 'fileutils'
6
6
 
7
7
  module Boxcars
8
- module VectorStores
8
+ module VectorStore
9
9
  module Hnswlib
10
10
  class SaveToHnswlib
11
11
  include VectorStore
12
12
 
13
13
  # @param document_embeddings [Array] An array of hashes containing the document id, document text, and embedding.
14
14
  # @param index_file_path [String] The path to the index file.
15
- # @param hnswlib_config [Boxcars::VectorStores::Hnswlib::Config] The config object for the hnswlib index.
15
+ # @param hnswlib_config [Boxcars::VectorStore::Hnswlib::Config] The config object for the hnswlib index.
16
16
  # @option json_doc_file_path [String] Optional. The path to the json file containing the document text.
17
17
  def initialize(document_embeddings:, index_file_path:, hnswlib_config:, json_doc_file_path: nil)
18
18
  @document_embeddings = document_embeddings
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  module InMemory
6
6
  MemoryVector = Struct.new(:content, :embedding, :metadatax)
7
7
 
@@ -49,9 +49,9 @@ module Boxcars
49
49
  @embeddings_method ||=
50
50
  case @embedding_tool
51
51
  when :openai
52
- { klass: Boxcars::VectorStores::EmbedViaOpenAI, client: openai_client }
52
+ { klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
53
53
  when :tensorflow
54
- { klass: Boxcars::VectorStores::EmbedViaTensorflow, client: nil }
54
+ { klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
55
55
  end
56
56
  end
57
57
 
@@ -9,14 +9,14 @@
9
9
  # { page_content: "what's this", metadata: { a: 1 } },
10
10
  # ]
11
11
  #
12
- # vector_documents = Boxcars::VectorStores::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
12
+ # vector_documents = Boxcars::VectorStore::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
13
13
  #
14
- # result = Boxcars::VectorStores::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
14
+ # result = Boxcars::VectorStore::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
15
15
  #
16
- # expect(result).to eq(Boxcars::VectorStores::Document.new({ page_content: "hello", metadata: { a: 1 } }))
16
+ # expect(result).to eq(Boxcars::VectorStore::Document.new({ page_content: "hello", metadata: { a: 1 } }))
17
17
 
18
18
  module Boxcars
19
- module VectorStores
19
+ module VectorStore
20
20
  module InMemory
21
21
  class Search
22
22
  include VectorStore
@@ -63,9 +63,9 @@ module Boxcars
63
63
  def embeddings_method(embedding_tool)
64
64
  case embedding_tool
65
65
  when :openai
66
- { klass: Boxcars::VectorStores::EmbedViaOpenAI, client: openai_client }
66
+ { klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
67
67
  when :tensorflow
68
- { klass: Boxcars::VectorStores::EmbedViaTensorflow, client: nil }
68
+ { klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
69
69
  end
70
70
  end
71
71
 
@@ -3,7 +3,7 @@
3
3
  require 'hnswlib'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  class SimilaritySearch
8
8
  def initialize(embeddings:, vector_store:, openai_connection: nil, openai_access_token: nil)
9
9
  @embeddings = embeddings
@@ -32,13 +32,13 @@ module Boxcars
32
32
  end
33
33
 
34
34
  def convert_query_to_vector(query)
35
- Boxcars::VectorStores::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
35
+ Boxcars::VectorStore::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
36
36
  end
37
37
 
38
38
  def create_similarity_search_instance
39
39
  case vector_store
40
40
  when ::Hnswlib::HierarchicalNSW
41
- Boxcars::VectorStores::Hnswlib::HnswlibSearch.new(
41
+ Boxcars::VectorStore::Hnswlib::HnswlibSearch.new(
42
42
  vector_store: vector_store,
43
43
  options: { json_doc_path: embeddings, num_neighbors: 2 }
44
44
  )
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  # Split a text into chunks of a given size.
6
6
  class SplitText
7
7
  include VectorStore
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ module VectorStore
6
+ module ClassMethods
7
+ VectorStoreError = Class.new(StandardError)
8
+
9
+ def call(*args, **kw_args)
10
+ new(*args, **kw_args).call
11
+ end
12
+ end
13
+
14
+ def self.included(base)
15
+ base.extend(ClassMethods)
16
+
17
+ class << base
18
+ private :new
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ require_relative "vector_store/document"
25
+ require_relative "vector_store/embed_via_open_ai"
26
+ require_relative "vector_store/embed_via_tensorflow"
27
+ require_relative "vector_store/split_text"
28
+ require_relative "vector_store/similarity_search"
29
+ require_relative "vector_store/hnswlib/hnswlib_config"
30
+ require_relative "vector_store/hnswlib/save_to_hnswlib"
31
+ require_relative "vector_store/hnswlib/build_vector_store"
32
+ require_relative "vector_store/hnswlib/hnswlib_search"
33
+ require_relative "vector_store/in_memory/add_documents"
34
+ require_relative "vector_store/in_memory/search"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Boxcars
4
4
  # The current version of the gem.
5
- VERSION = "0.2.10"
5
+ VERSION = "0.2.11"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boxcars
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.10
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Sullivan
@@ -124,19 +124,6 @@ files:
124
124
  - lib/boxcars/boxcar/google_search.rb
125
125
  - lib/boxcars/boxcar/sql.rb
126
126
  - lib/boxcars/boxcar/swagger.rb
127
- - lib/boxcars/boxcar/vector_search.rb
128
- - lib/boxcars/boxcar/vector_store.rb
129
- - lib/boxcars/boxcar/vector_stores/document.rb
130
- - lib/boxcars/boxcar/vector_stores/embed_via_open_ai.rb
131
- - lib/boxcars/boxcar/vector_stores/embed_via_tensorflow.rb
132
- - lib/boxcars/boxcar/vector_stores/hnswlib/build_vector_store.rb
133
- - lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_config.rb
134
- - lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_search.rb
135
- - lib/boxcars/boxcar/vector_stores/hnswlib/save_to_hnswlib.rb
136
- - lib/boxcars/boxcar/vector_stores/in_memory/add_documents.rb
137
- - lib/boxcars/boxcar/vector_stores/in_memory/search.rb
138
- - lib/boxcars/boxcar/vector_stores/similarity_search.rb
139
- - lib/boxcars/boxcar/vector_stores/split_text.rb
140
127
  - lib/boxcars/boxcar/wikipedia_search.rb
141
128
  - lib/boxcars/conversation.rb
142
129
  - lib/boxcars/conversation_prompt.rb
@@ -152,6 +139,19 @@ files:
152
139
  - lib/boxcars/train/train_action.rb
153
140
  - lib/boxcars/train/train_finish.rb
154
141
  - lib/boxcars/train/zero_shot.rb
142
+ - lib/boxcars/vector_search.rb
143
+ - lib/boxcars/vector_store.rb
144
+ - lib/boxcars/vector_store/document.rb
145
+ - lib/boxcars/vector_store/embed_via_open_ai.rb
146
+ - lib/boxcars/vector_store/embed_via_tensorflow.rb
147
+ - lib/boxcars/vector_store/hnswlib/build_vector_store.rb
148
+ - lib/boxcars/vector_store/hnswlib/hnswlib_config.rb
149
+ - lib/boxcars/vector_store/hnswlib/hnswlib_search.rb
150
+ - lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb
151
+ - lib/boxcars/vector_store/in_memory/add_documents.rb
152
+ - lib/boxcars/vector_store/in_memory/search.rb
153
+ - lib/boxcars/vector_store/similarity_search.rb
154
+ - lib/boxcars/vector_store/split_text.rb
155
155
  - lib/boxcars/version.rb
156
156
  homepage: https://github.com/BoxcarsAI/boxcars
157
157
  licenses:
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Boxcars is a framework for running a series of tools to get an answer to a question.
4
- module Boxcars
5
- module VectorStore
6
- module ClassMethods
7
- VectorStoresError = Class.new(StandardError)
8
-
9
- def call(*args, **kw_args)
10
- new(*args, **kw_args).call
11
- end
12
- end
13
-
14
- def self.included(base)
15
- base.extend(ClassMethods)
16
-
17
- class << base
18
- private :new
19
- end
20
- end
21
- end
22
- end
23
-
24
- require_relative "vector_stores/document"
25
- require_relative "vector_stores/embed_via_open_ai"
26
- require_relative "vector_stores/embed_via_tensorflow"
27
- require_relative "vector_stores/split_text"
28
- require_relative "vector_stores/similarity_search"
29
- require_relative "vector_stores/hnswlib/hnswlib_config"
30
- require_relative "vector_stores/hnswlib/save_to_hnswlib"
31
- require_relative "vector_stores/hnswlib/build_vector_store"
32
- require_relative "vector_stores/hnswlib/hnswlib_search"
33
- require_relative "vector_stores/in_memory/add_documents"
34
- require_relative "vector_stores/in_memory/search"