boxcars 0.2.10 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -2
- data/Gemfile.lock +1 -1
- data/lib/boxcars/boxcar.rb +1 -1
- data/lib/boxcars/{boxcar/vector_search.rb → vector_search.rb} +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/document.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/embed_via_open_ai.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/embed_via_tensorflow.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/build_vector_store.rb +5 -5
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/hnswlib_config.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/hnswlib_search.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/save_to_hnswlib.rb +2 -2
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/in_memory/add_documents.rb +3 -3
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/in_memory/search.rb +6 -6
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/similarity_search.rb +3 -3
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/split_text.rb +1 -1
- data/lib/boxcars/vector_store.rb +34 -0
- data/lib/boxcars/version.rb +1 -1
- metadata +14 -14
- data/lib/boxcars/boxcar/vector_store.rb +0 -34
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: da3d8b9838602151837c0cc5bb9f3cae841ba24d1c338eade82c7807d913d4bb
|
|
4
|
+
data.tar.gz: f7be434c18f0ff2c95625fe32fae25f3a5df265331425d0c1f0430ab75761578
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 57dd238c56f13f63a4665d4469efdabfa5f3c54f82cb6832c4158858d4b307a80c57f619633cdad6934d64186d560dcab7a62efa9adc727edfa61afbc5acc188
|
|
7
|
+
data.tar.gz: d2c782acf20c6b6b13cbfadf8f5406363b347be90a058626ec1bb21fe32baf1acb57a4a72c4770a7ad820700b465c0474a498080604e23e5d0270001d5d4aec1
|
data/CHANGELOG.md
CHANGED
|
@@ -1,13 +1,25 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
-
## [
|
|
3
|
+
## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
|
|
4
4
|
|
|
5
|
-
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...
|
|
5
|
+
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
|
|
6
6
|
|
|
7
7
|
**Implemented enhancements:**
|
|
8
8
|
|
|
9
9
|
- Notion Q&A [\#13](https://github.com/BoxcarsAI/boxcars/issues/13)
|
|
10
10
|
|
|
11
|
+
**Closed issues:**
|
|
12
|
+
|
|
13
|
+
- undefined method `default\_train' for Boxcars:Module \(ActiveRecord example\) [\#66](https://github.com/BoxcarsAI/boxcars/issues/66)
|
|
14
|
+
- Chore: reduce the number of markdown files in Notion DB folder [\#56](https://github.com/BoxcarsAI/boxcars/issues/56)
|
|
15
|
+
|
|
16
|
+
**Merged pull requests:**
|
|
17
|
+
|
|
18
|
+
- \[DRAFT\] Feature - add in memory vector store [\#65](https://github.com/BoxcarsAI/boxcars/pull/65) ([jaigouk](https://github.com/jaigouk))
|
|
19
|
+
- Chore - rename module name from Embeddings to VectorStores [\#63](https://github.com/BoxcarsAI/boxcars/pull/63) ([jaigouk](https://github.com/jaigouk))
|
|
20
|
+
- remove bunch of markdown files in Notion\_DB directory [\#62](https://github.com/BoxcarsAI/boxcars/pull/62) ([jaigouk](https://github.com/jaigouk))
|
|
21
|
+
- Fixed typo in README.md [\#61](https://github.com/BoxcarsAI/boxcars/pull/61) ([robmack](https://github.com/robmack))
|
|
22
|
+
|
|
11
23
|
## [v0.2.9](https://github.com/BoxcarsAI/boxcars/tree/v0.2.9) (2023-04-22)
|
|
12
24
|
|
|
13
25
|
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.8...v0.2.9)
|
data/Gemfile.lock
CHANGED
data/lib/boxcars/boxcar.rb
CHANGED
|
@@ -5,7 +5,7 @@ require 'hnswlib'
|
|
|
5
5
|
require 'json'
|
|
6
6
|
|
|
7
7
|
module Boxcars
|
|
8
|
-
module
|
|
8
|
+
module VectorStore
|
|
9
9
|
module Hnswlib
|
|
10
10
|
class BuildVectorStore
|
|
11
11
|
include VectorStore
|
|
@@ -76,7 +76,7 @@ module Boxcars
|
|
|
76
76
|
|
|
77
77
|
docs = []
|
|
78
78
|
data.each do |chunk|
|
|
79
|
-
doc_output = Boxcars::
|
|
79
|
+
doc_output = Boxcars::VectorStore::SplitText.call(
|
|
80
80
|
separator: "\n", chunk_size: split_chunk_size, chunk_overlap: 0, text: chunk
|
|
81
81
|
)
|
|
82
82
|
docs.concat(doc_output)
|
|
@@ -98,7 +98,7 @@ module Boxcars
|
|
|
98
98
|
|
|
99
99
|
puts "Initializing Store..."
|
|
100
100
|
openai_client = Openai.open_ai_client
|
|
101
|
-
embeddings_with_dim = Boxcars::
|
|
101
|
+
embeddings_with_dim = Boxcars::VectorStore::EmbedViaOpenAI.call(texts: documents, client: openai_client)
|
|
102
102
|
document_embeddings = embeddings_with_dim.map.with_index do |item, index|
|
|
103
103
|
{ doc_id: index, embedding: item[:embedding], document: documents[index] }
|
|
104
104
|
end
|
|
@@ -110,7 +110,7 @@ module Boxcars
|
|
|
110
110
|
return true unless rebuild_required?
|
|
111
111
|
|
|
112
112
|
puts "Saving Vectorstore"
|
|
113
|
-
Boxcars::
|
|
113
|
+
Boxcars::VectorStore::Hnswlib::SaveToHnswlib.call(
|
|
114
114
|
document_embeddings: embeddings_with_config[:document_embeddings],
|
|
115
115
|
index_file_path: index_file_path,
|
|
116
116
|
json_doc_file_path: json_doc_file_path,
|
|
@@ -121,7 +121,7 @@ module Boxcars
|
|
|
121
121
|
|
|
122
122
|
def hnswlib_config(dim)
|
|
123
123
|
# dim: length of datum point vector that will be indexed.
|
|
124
|
-
Boxcars::
|
|
124
|
+
Boxcars::VectorStore::Hnswlib::HnswlibConfig.new(
|
|
125
125
|
metric: "l2", max_item: 10000, dim: dim
|
|
126
126
|
)
|
|
127
127
|
end
|
|
@@ -5,14 +5,14 @@ require 'json'
|
|
|
5
5
|
require 'fileutils'
|
|
6
6
|
|
|
7
7
|
module Boxcars
|
|
8
|
-
module
|
|
8
|
+
module VectorStore
|
|
9
9
|
module Hnswlib
|
|
10
10
|
class SaveToHnswlib
|
|
11
11
|
include VectorStore
|
|
12
12
|
|
|
13
13
|
# @param document_embeddings [Array] An array of hashes containing the document id, document text, and embedding.
|
|
14
14
|
# @param index_file_path [String] The path to the index file.
|
|
15
|
-
# @param hnswlib_config [Boxcars::
|
|
15
|
+
# @param hnswlib_config [Boxcars::VectorStore::Hnswlib::Config] The config object for the hnswlib index.
|
|
16
16
|
# @option json_doc_file_path [String] Optional. The path to the json file containing the document text.
|
|
17
17
|
def initialize(document_embeddings:, index_file_path:, hnswlib_config:, json_doc_file_path: nil)
|
|
18
18
|
@document_embeddings = document_embeddings
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Boxcars
|
|
4
|
-
module
|
|
4
|
+
module VectorStore
|
|
5
5
|
module InMemory
|
|
6
6
|
MemoryVector = Struct.new(:content, :embedding, :metadatax)
|
|
7
7
|
|
|
@@ -49,9 +49,9 @@ module Boxcars
|
|
|
49
49
|
@embeddings_method ||=
|
|
50
50
|
case @embedding_tool
|
|
51
51
|
when :openai
|
|
52
|
-
{ klass: Boxcars::
|
|
52
|
+
{ klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
|
|
53
53
|
when :tensorflow
|
|
54
|
-
{ klass: Boxcars::
|
|
54
|
+
{ klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
|
|
55
55
|
end
|
|
56
56
|
end
|
|
57
57
|
|
|
@@ -9,14 +9,14 @@
|
|
|
9
9
|
# { page_content: "what's this", metadata: { a: 1 } },
|
|
10
10
|
# ]
|
|
11
11
|
#
|
|
12
|
-
# vector_documents = Boxcars::
|
|
12
|
+
# vector_documents = Boxcars::VectorStore::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
|
|
13
13
|
#
|
|
14
|
-
# result = Boxcars::
|
|
14
|
+
# result = Boxcars::VectorStore::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
|
|
15
15
|
#
|
|
16
|
-
# expect(result).to eq(Boxcars::
|
|
16
|
+
# expect(result).to eq(Boxcars::VectorStore::Document.new({ page_content: "hello", metadata: { a: 1 } }))
|
|
17
17
|
|
|
18
18
|
module Boxcars
|
|
19
|
-
module
|
|
19
|
+
module VectorStore
|
|
20
20
|
module InMemory
|
|
21
21
|
class Search
|
|
22
22
|
include VectorStore
|
|
@@ -63,9 +63,9 @@ module Boxcars
|
|
|
63
63
|
def embeddings_method(embedding_tool)
|
|
64
64
|
case embedding_tool
|
|
65
65
|
when :openai
|
|
66
|
-
{ klass: Boxcars::
|
|
66
|
+
{ klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
|
|
67
67
|
when :tensorflow
|
|
68
|
-
{ klass: Boxcars::
|
|
68
|
+
{ klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
|
|
69
69
|
end
|
|
70
70
|
end
|
|
71
71
|
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
require 'hnswlib'
|
|
4
4
|
|
|
5
5
|
module Boxcars
|
|
6
|
-
module
|
|
6
|
+
module VectorStore
|
|
7
7
|
class SimilaritySearch
|
|
8
8
|
def initialize(embeddings:, vector_store:, openai_connection: nil, openai_access_token: nil)
|
|
9
9
|
@embeddings = embeddings
|
|
@@ -32,13 +32,13 @@ module Boxcars
|
|
|
32
32
|
end
|
|
33
33
|
|
|
34
34
|
def convert_query_to_vector(query)
|
|
35
|
-
Boxcars::
|
|
35
|
+
Boxcars::VectorStore::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
def create_similarity_search_instance
|
|
39
39
|
case vector_store
|
|
40
40
|
when ::Hnswlib::HierarchicalNSW
|
|
41
|
-
Boxcars::
|
|
41
|
+
Boxcars::VectorStore::Hnswlib::HnswlibSearch.new(
|
|
42
42
|
vector_store: vector_store,
|
|
43
43
|
options: { json_doc_path: embeddings, num_neighbors: 2 }
|
|
44
44
|
)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
|
4
|
+
module Boxcars
|
|
5
|
+
module VectorStore
|
|
6
|
+
module ClassMethods
|
|
7
|
+
VectorStoreError = Class.new(StandardError)
|
|
8
|
+
|
|
9
|
+
def call(*args, **kw_args)
|
|
10
|
+
new(*args, **kw_args).call
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.included(base)
|
|
15
|
+
base.extend(ClassMethods)
|
|
16
|
+
|
|
17
|
+
class << base
|
|
18
|
+
private :new
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
require_relative "vector_store/document"
|
|
25
|
+
require_relative "vector_store/embed_via_open_ai"
|
|
26
|
+
require_relative "vector_store/embed_via_tensorflow"
|
|
27
|
+
require_relative "vector_store/split_text"
|
|
28
|
+
require_relative "vector_store/similarity_search"
|
|
29
|
+
require_relative "vector_store/hnswlib/hnswlib_config"
|
|
30
|
+
require_relative "vector_store/hnswlib/save_to_hnswlib"
|
|
31
|
+
require_relative "vector_store/hnswlib/build_vector_store"
|
|
32
|
+
require_relative "vector_store/hnswlib/hnswlib_search"
|
|
33
|
+
require_relative "vector_store/in_memory/add_documents"
|
|
34
|
+
require_relative "vector_store/in_memory/search"
|
data/lib/boxcars/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: boxcars
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.11
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Francis Sullivan
|
|
@@ -124,19 +124,6 @@ files:
|
|
|
124
124
|
- lib/boxcars/boxcar/google_search.rb
|
|
125
125
|
- lib/boxcars/boxcar/sql.rb
|
|
126
126
|
- lib/boxcars/boxcar/swagger.rb
|
|
127
|
-
- lib/boxcars/boxcar/vector_search.rb
|
|
128
|
-
- lib/boxcars/boxcar/vector_store.rb
|
|
129
|
-
- lib/boxcars/boxcar/vector_stores/document.rb
|
|
130
|
-
- lib/boxcars/boxcar/vector_stores/embed_via_open_ai.rb
|
|
131
|
-
- lib/boxcars/boxcar/vector_stores/embed_via_tensorflow.rb
|
|
132
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/build_vector_store.rb
|
|
133
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_config.rb
|
|
134
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_search.rb
|
|
135
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/save_to_hnswlib.rb
|
|
136
|
-
- lib/boxcars/boxcar/vector_stores/in_memory/add_documents.rb
|
|
137
|
-
- lib/boxcars/boxcar/vector_stores/in_memory/search.rb
|
|
138
|
-
- lib/boxcars/boxcar/vector_stores/similarity_search.rb
|
|
139
|
-
- lib/boxcars/boxcar/vector_stores/split_text.rb
|
|
140
127
|
- lib/boxcars/boxcar/wikipedia_search.rb
|
|
141
128
|
- lib/boxcars/conversation.rb
|
|
142
129
|
- lib/boxcars/conversation_prompt.rb
|
|
@@ -152,6 +139,19 @@ files:
|
|
|
152
139
|
- lib/boxcars/train/train_action.rb
|
|
153
140
|
- lib/boxcars/train/train_finish.rb
|
|
154
141
|
- lib/boxcars/train/zero_shot.rb
|
|
142
|
+
- lib/boxcars/vector_search.rb
|
|
143
|
+
- lib/boxcars/vector_store.rb
|
|
144
|
+
- lib/boxcars/vector_store/document.rb
|
|
145
|
+
- lib/boxcars/vector_store/embed_via_open_ai.rb
|
|
146
|
+
- lib/boxcars/vector_store/embed_via_tensorflow.rb
|
|
147
|
+
- lib/boxcars/vector_store/hnswlib/build_vector_store.rb
|
|
148
|
+
- lib/boxcars/vector_store/hnswlib/hnswlib_config.rb
|
|
149
|
+
- lib/boxcars/vector_store/hnswlib/hnswlib_search.rb
|
|
150
|
+
- lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb
|
|
151
|
+
- lib/boxcars/vector_store/in_memory/add_documents.rb
|
|
152
|
+
- lib/boxcars/vector_store/in_memory/search.rb
|
|
153
|
+
- lib/boxcars/vector_store/similarity_search.rb
|
|
154
|
+
- lib/boxcars/vector_store/split_text.rb
|
|
155
155
|
- lib/boxcars/version.rb
|
|
156
156
|
homepage: https://github.com/BoxcarsAI/boxcars
|
|
157
157
|
licenses:
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
|
4
|
-
module Boxcars
|
|
5
|
-
module VectorStore
|
|
6
|
-
module ClassMethods
|
|
7
|
-
VectorStoresError = Class.new(StandardError)
|
|
8
|
-
|
|
9
|
-
def call(*args, **kw_args)
|
|
10
|
-
new(*args, **kw_args).call
|
|
11
|
-
end
|
|
12
|
-
end
|
|
13
|
-
|
|
14
|
-
def self.included(base)
|
|
15
|
-
base.extend(ClassMethods)
|
|
16
|
-
|
|
17
|
-
class << base
|
|
18
|
-
private :new
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
require_relative "vector_stores/document"
|
|
25
|
-
require_relative "vector_stores/embed_via_open_ai"
|
|
26
|
-
require_relative "vector_stores/embed_via_tensorflow"
|
|
27
|
-
require_relative "vector_stores/split_text"
|
|
28
|
-
require_relative "vector_stores/similarity_search"
|
|
29
|
-
require_relative "vector_stores/hnswlib/hnswlib_config"
|
|
30
|
-
require_relative "vector_stores/hnswlib/save_to_hnswlib"
|
|
31
|
-
require_relative "vector_stores/hnswlib/build_vector_store"
|
|
32
|
-
require_relative "vector_stores/hnswlib/hnswlib_search"
|
|
33
|
-
require_relative "vector_stores/in_memory/add_documents"
|
|
34
|
-
require_relative "vector_stores/in_memory/search"
|