boxcars 0.2.10 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7d76e1223cfa17d11ab3670ebc6d43a8dbdc142e6aa85bd6bb6c230ccdea160
4
- data.tar.gz: 39172ff71234851bcef10c762ba29481b4dba199e851c37e8092794ed24604b1
3
+ metadata.gz: da3d8b9838602151837c0cc5bb9f3cae841ba24d1c338eade82c7807d913d4bb
4
+ data.tar.gz: f7be434c18f0ff2c95625fe32fae25f3a5df265331425d0c1f0430ab75761578
5
5
  SHA512:
6
- metadata.gz: b720092a75593e767564234f5990ccb7e57a382f7e5065c5cbaf9496ca0622ceb9dc63f07c10ff1ba305672c42817f94b371fadc3562b0043cfcde83ef2f1a8d
7
- data.tar.gz: 2ded919f3e0157d777b541589d48472b0cceb348f1f9bfeb18f63b29dad25a4d23183a34aa111db726c366bbe0b74283d34a1c0e14372b726448e9c69961dc65
6
+ metadata.gz: 57dd238c56f13f63a4665d4469efdabfa5f3c54f82cb6832c4158858d4b307a80c57f619633cdad6934d64186d560dcab7a62efa9adc727edfa61afbc5acc188
7
+ data.tar.gz: d2c782acf20c6b6b13cbfadf8f5406363b347be90a058626ec1bb21fe32baf1acb57a4a72c4770a7ad820700b465c0474a498080604e23e5d0270001d5d4aec1
data/CHANGELOG.md CHANGED
@@ -1,13 +1,25 @@
1
1
  # Changelog
2
2
 
3
- ## [Unreleased](https://github.com/BoxcarsAI/boxcars/tree/HEAD)
3
+ ## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
4
4
 
5
- [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...HEAD)
5
+ [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
6
6
 
7
7
  **Implemented enhancements:**
8
8
 
9
9
  - Notion Q&A [\#13](https://github.com/BoxcarsAI/boxcars/issues/13)
10
10
 
11
+ **Closed issues:**
12
+
13
+ - undefined method `default\_train' for Boxcars:Module \(ActiveRecord example\) [\#66](https://github.com/BoxcarsAI/boxcars/issues/66)
14
+ - Chore: reduce the number of markdown files in Notion DB folder [\#56](https://github.com/BoxcarsAI/boxcars/issues/56)
15
+
16
+ **Merged pull requests:**
17
+
18
+ - \[DRAFT\] Feature - add in memory vector store [\#65](https://github.com/BoxcarsAI/boxcars/pull/65) ([jaigouk](https://github.com/jaigouk))
19
+ - Chore - rename module name from Embeddings to VectorStores [\#63](https://github.com/BoxcarsAI/boxcars/pull/63) ([jaigouk](https://github.com/jaigouk))
20
+ - remove bunch of markdown files in Notion\_DB directory [\#62](https://github.com/BoxcarsAI/boxcars/pull/62) ([jaigouk](https://github.com/jaigouk))
21
+ - Fixed typo in README.md [\#61](https://github.com/BoxcarsAI/boxcars/pull/61) ([robmack](https://github.com/robmack))
22
+
11
23
  ## [v0.2.9](https://github.com/BoxcarsAI/boxcars/tree/v0.2.9) (2023-04-22)
12
24
 
13
25
  [Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.8...v0.2.9)
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- boxcars (0.2.10)
4
+ boxcars (0.2.11)
5
5
  google_search_results (~> 2.2)
6
6
  gpt4all (~> 0.0.4)
7
7
  ruby-openai (~> 3.0)
@@ -156,4 +156,4 @@ require "boxcars/boxcar/wikipedia_search"
156
156
  require "boxcars/boxcar/sql"
157
157
  require "boxcars/boxcar/swagger"
158
158
  require "boxcars/boxcar/active_record"
159
- require "boxcars/boxcar/vector_search"
159
+ require "boxcars/vector_search"
@@ -8,4 +8,4 @@ module Boxcars
8
8
  end
9
9
  end
10
10
 
11
- require "boxcars/boxcar/vector_store"
11
+ require "boxcars/vector_store"
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  class Document
6
6
  attr_accessor :page_content, :metadata
7
7
 
@@ -3,7 +3,7 @@
3
3
  require 'openai'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  class EmbedViaOpenAI
8
8
  include VectorStore
9
9
 
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  class EmbedViaTensorflow
6
6
  include VectorStore
7
7
  def call
@@ -5,7 +5,7 @@ require 'hnswlib'
5
5
  require 'json'
6
6
 
7
7
  module Boxcars
8
- module VectorStores
8
+ module VectorStore
9
9
  module Hnswlib
10
10
  class BuildVectorStore
11
11
  include VectorStore
@@ -76,7 +76,7 @@ module Boxcars
76
76
 
77
77
  docs = []
78
78
  data.each do |chunk|
79
- doc_output = Boxcars::VectorStores::SplitText.call(
79
+ doc_output = Boxcars::VectorStore::SplitText.call(
80
80
  separator: "\n", chunk_size: split_chunk_size, chunk_overlap: 0, text: chunk
81
81
  )
82
82
  docs.concat(doc_output)
@@ -98,7 +98,7 @@ module Boxcars
98
98
 
99
99
  puts "Initializing Store..."
100
100
  openai_client = Openai.open_ai_client
101
- embeddings_with_dim = Boxcars::VectorStores::EmbedViaOpenAI.call(texts: documents, client: openai_client)
101
+ embeddings_with_dim = Boxcars::VectorStore::EmbedViaOpenAI.call(texts: documents, client: openai_client)
102
102
  document_embeddings = embeddings_with_dim.map.with_index do |item, index|
103
103
  { doc_id: index, embedding: item[:embedding], document: documents[index] }
104
104
  end
@@ -110,7 +110,7 @@ module Boxcars
110
110
  return true unless rebuild_required?
111
111
 
112
112
  puts "Saving Vectorstore"
113
- Boxcars::VectorStores::Hnswlib::SaveToHnswlib.call(
113
+ Boxcars::VectorStore::Hnswlib::SaveToHnswlib.call(
114
114
  document_embeddings: embeddings_with_config[:document_embeddings],
115
115
  index_file_path: index_file_path,
116
116
  json_doc_file_path: json_doc_file_path,
@@ -121,7 +121,7 @@ module Boxcars
121
121
 
122
122
  def hnswlib_config(dim)
123
123
  # dim: length of datum point vector that will be indexed.
124
- Boxcars::VectorStores::Hnswlib::HnswlibConfig.new(
124
+ Boxcars::VectorStore::Hnswlib::HnswlibConfig.new(
125
125
  metric: "l2", max_item: 10000, dim: dim
126
126
  )
127
127
  end
@@ -3,7 +3,7 @@
3
3
  require 'json'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  module Hnswlib
8
8
  class HnswlibConfig
9
9
  attr_reader :metric, :max_item, :dim, :ef_construction, :m
@@ -4,7 +4,7 @@ require 'hnswlib'
4
4
  require 'json'
5
5
 
6
6
  module Boxcars
7
- module VectorStores
7
+ module VectorStore
8
8
  module Hnswlib
9
9
  class HnswlibSearch
10
10
  def initialize(vector_store:, options: {})
@@ -5,14 +5,14 @@ require 'json'
5
5
  require 'fileutils'
6
6
 
7
7
  module Boxcars
8
- module VectorStores
8
+ module VectorStore
9
9
  module Hnswlib
10
10
  class SaveToHnswlib
11
11
  include VectorStore
12
12
 
13
13
  # @param document_embeddings [Array] An array of hashes containing the document id, document text, and embedding.
14
14
  # @param index_file_path [String] The path to the index file.
15
- # @param hnswlib_config [Boxcars::VectorStores::Hnswlib::Config] The config object for the hnswlib index.
15
+ # @param hnswlib_config [Boxcars::VectorStore::Hnswlib::Config] The config object for the hnswlib index.
16
16
  # @option json_doc_file_path [String] Optional. The path to the json file containing the document text.
17
17
  def initialize(document_embeddings:, index_file_path:, hnswlib_config:, json_doc_file_path: nil)
18
18
  @document_embeddings = document_embeddings
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  module InMemory
6
6
  MemoryVector = Struct.new(:content, :embedding, :metadatax)
7
7
 
@@ -49,9 +49,9 @@ module Boxcars
49
49
  @embeddings_method ||=
50
50
  case @embedding_tool
51
51
  when :openai
52
- { klass: Boxcars::VectorStores::EmbedViaOpenAI, client: openai_client }
52
+ { klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
53
53
  when :tensorflow
54
- { klass: Boxcars::VectorStores::EmbedViaTensorflow, client: nil }
54
+ { klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
55
55
  end
56
56
  end
57
57
 
@@ -9,14 +9,14 @@
9
9
  # { page_content: "what's this", metadata: { a: 1 } },
10
10
  # ]
11
11
  #
12
- # vector_documents = Boxcars::VectorStores::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
12
+ # vector_documents = Boxcars::VectorStore::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
13
13
  #
14
- # result = Boxcars::VectorStores::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
14
+ # result = Boxcars::VectorStore::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
15
15
  #
16
- # expect(result).to eq(Boxcars::VectorStores::Document.new({ page_content: "hello", metadata: { a: 1 } }))
16
+ # expect(result).to eq(Boxcars::VectorStore::Document.new({ page_content: "hello", metadata: { a: 1 } }))
17
17
 
18
18
  module Boxcars
19
- module VectorStores
19
+ module VectorStore
20
20
  module InMemory
21
21
  class Search
22
22
  include VectorStore
@@ -63,9 +63,9 @@ module Boxcars
63
63
  def embeddings_method(embedding_tool)
64
64
  case embedding_tool
65
65
  when :openai
66
- { klass: Boxcars::VectorStores::EmbedViaOpenAI, client: openai_client }
66
+ { klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
67
67
  when :tensorflow
68
- { klass: Boxcars::VectorStores::EmbedViaTensorflow, client: nil }
68
+ { klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
69
69
  end
70
70
  end
71
71
 
@@ -3,7 +3,7 @@
3
3
  require 'hnswlib'
4
4
 
5
5
  module Boxcars
6
- module VectorStores
6
+ module VectorStore
7
7
  class SimilaritySearch
8
8
  def initialize(embeddings:, vector_store:, openai_connection: nil, openai_access_token: nil)
9
9
  @embeddings = embeddings
@@ -32,13 +32,13 @@ module Boxcars
32
32
  end
33
33
 
34
34
  def convert_query_to_vector(query)
35
- Boxcars::VectorStores::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
35
+ Boxcars::VectorStore::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
36
36
  end
37
37
 
38
38
  def create_similarity_search_instance
39
39
  case vector_store
40
40
  when ::Hnswlib::HierarchicalNSW
41
- Boxcars::VectorStores::Hnswlib::HnswlibSearch.new(
41
+ Boxcars::VectorStore::Hnswlib::HnswlibSearch.new(
42
42
  vector_store: vector_store,
43
43
  options: { json_doc_path: embeddings, num_neighbors: 2 }
44
44
  )
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Boxcars
4
- module VectorStores
4
+ module VectorStore
5
5
  # Split a text into chunks of a given size.
6
6
  class SplitText
7
7
  include VectorStore
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Boxcars is a framework for running a series of tools to get an answer to a question.
4
+ module Boxcars
5
+ module VectorStore
6
+ module ClassMethods
7
+ VectorStoreError = Class.new(StandardError)
8
+
9
+ def call(*args, **kw_args)
10
+ new(*args, **kw_args).call
11
+ end
12
+ end
13
+
14
+ def self.included(base)
15
+ base.extend(ClassMethods)
16
+
17
+ class << base
18
+ private :new
19
+ end
20
+ end
21
+ end
22
+ end
23
+
24
+ require_relative "vector_store/document"
25
+ require_relative "vector_store/embed_via_open_ai"
26
+ require_relative "vector_store/embed_via_tensorflow"
27
+ require_relative "vector_store/split_text"
28
+ require_relative "vector_store/similarity_search"
29
+ require_relative "vector_store/hnswlib/hnswlib_config"
30
+ require_relative "vector_store/hnswlib/save_to_hnswlib"
31
+ require_relative "vector_store/hnswlib/build_vector_store"
32
+ require_relative "vector_store/hnswlib/hnswlib_search"
33
+ require_relative "vector_store/in_memory/add_documents"
34
+ require_relative "vector_store/in_memory/search"
@@ -2,5 +2,5 @@
2
2
 
3
3
  module Boxcars
4
4
  # The current version of the gem.
5
- VERSION = "0.2.10"
5
+ VERSION = "0.2.11"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boxcars
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.10
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Francis Sullivan
@@ -124,19 +124,6 @@ files:
124
124
  - lib/boxcars/boxcar/google_search.rb
125
125
  - lib/boxcars/boxcar/sql.rb
126
126
  - lib/boxcars/boxcar/swagger.rb
127
- - lib/boxcars/boxcar/vector_search.rb
128
- - lib/boxcars/boxcar/vector_store.rb
129
- - lib/boxcars/boxcar/vector_stores/document.rb
130
- - lib/boxcars/boxcar/vector_stores/embed_via_open_ai.rb
131
- - lib/boxcars/boxcar/vector_stores/embed_via_tensorflow.rb
132
- - lib/boxcars/boxcar/vector_stores/hnswlib/build_vector_store.rb
133
- - lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_config.rb
134
- - lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_search.rb
135
- - lib/boxcars/boxcar/vector_stores/hnswlib/save_to_hnswlib.rb
136
- - lib/boxcars/boxcar/vector_stores/in_memory/add_documents.rb
137
- - lib/boxcars/boxcar/vector_stores/in_memory/search.rb
138
- - lib/boxcars/boxcar/vector_stores/similarity_search.rb
139
- - lib/boxcars/boxcar/vector_stores/split_text.rb
140
127
  - lib/boxcars/boxcar/wikipedia_search.rb
141
128
  - lib/boxcars/conversation.rb
142
129
  - lib/boxcars/conversation_prompt.rb
@@ -152,6 +139,19 @@ files:
152
139
  - lib/boxcars/train/train_action.rb
153
140
  - lib/boxcars/train/train_finish.rb
154
141
  - lib/boxcars/train/zero_shot.rb
142
+ - lib/boxcars/vector_search.rb
143
+ - lib/boxcars/vector_store.rb
144
+ - lib/boxcars/vector_store/document.rb
145
+ - lib/boxcars/vector_store/embed_via_open_ai.rb
146
+ - lib/boxcars/vector_store/embed_via_tensorflow.rb
147
+ - lib/boxcars/vector_store/hnswlib/build_vector_store.rb
148
+ - lib/boxcars/vector_store/hnswlib/hnswlib_config.rb
149
+ - lib/boxcars/vector_store/hnswlib/hnswlib_search.rb
150
+ - lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb
151
+ - lib/boxcars/vector_store/in_memory/add_documents.rb
152
+ - lib/boxcars/vector_store/in_memory/search.rb
153
+ - lib/boxcars/vector_store/similarity_search.rb
154
+ - lib/boxcars/vector_store/split_text.rb
155
155
  - lib/boxcars/version.rb
156
156
  homepage: https://github.com/BoxcarsAI/boxcars
157
157
  licenses:
@@ -1,34 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # Boxcars is a framework for running a series of tools to get an answer to a question.
4
- module Boxcars
5
- module VectorStore
6
- module ClassMethods
7
- VectorStoresError = Class.new(StandardError)
8
-
9
- def call(*args, **kw_args)
10
- new(*args, **kw_args).call
11
- end
12
- end
13
-
14
- def self.included(base)
15
- base.extend(ClassMethods)
16
-
17
- class << base
18
- private :new
19
- end
20
- end
21
- end
22
- end
23
-
24
- require_relative "vector_stores/document"
25
- require_relative "vector_stores/embed_via_open_ai"
26
- require_relative "vector_stores/embed_via_tensorflow"
27
- require_relative "vector_stores/split_text"
28
- require_relative "vector_stores/similarity_search"
29
- require_relative "vector_stores/hnswlib/hnswlib_config"
30
- require_relative "vector_stores/hnswlib/save_to_hnswlib"
31
- require_relative "vector_stores/hnswlib/build_vector_store"
32
- require_relative "vector_stores/hnswlib/hnswlib_search"
33
- require_relative "vector_stores/in_memory/add_documents"
34
- require_relative "vector_stores/in_memory/search"