boxcars 0.2.10 → 0.2.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -2
- data/Gemfile.lock +1 -1
- data/lib/boxcars/boxcar.rb +1 -1
- data/lib/boxcars/{boxcar/vector_search.rb → vector_search.rb} +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/document.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/embed_via_open_ai.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/embed_via_tensorflow.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/build_vector_store.rb +5 -5
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/hnswlib_config.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/hnswlib_search.rb +1 -1
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/hnswlib/save_to_hnswlib.rb +2 -2
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/in_memory/add_documents.rb +3 -3
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/in_memory/search.rb +6 -6
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/similarity_search.rb +3 -3
- data/lib/boxcars/{boxcar/vector_stores → vector_store}/split_text.rb +1 -1
- data/lib/boxcars/vector_store.rb +34 -0
- data/lib/boxcars/version.rb +1 -1
- metadata +14 -14
- data/lib/boxcars/boxcar/vector_store.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: da3d8b9838602151837c0cc5bb9f3cae841ba24d1c338eade82c7807d913d4bb
|
4
|
+
data.tar.gz: f7be434c18f0ff2c95625fe32fae25f3a5df265331425d0c1f0430ab75761578
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57dd238c56f13f63a4665d4469efdabfa5f3c54f82cb6832c4158858d4b307a80c57f619633cdad6934d64186d560dcab7a62efa9adc727edfa61afbc5acc188
|
7
|
+
data.tar.gz: d2c782acf20c6b6b13cbfadf8f5406363b347be90a058626ec1bb21fe32baf1acb57a4a72c4770a7ad820700b465c0474a498080604e23e5d0270001d5d4aec1
|
data/CHANGELOG.md
CHANGED
@@ -1,13 +1,25 @@
|
|
1
1
|
# Changelog
|
2
2
|
|
3
|
-
## [
|
3
|
+
## [v0.2.10](https://github.com/BoxcarsAI/boxcars/tree/v0.2.10) (2023-05-05)
|
4
4
|
|
5
|
-
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...
|
5
|
+
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.9...v0.2.10)
|
6
6
|
|
7
7
|
**Implemented enhancements:**
|
8
8
|
|
9
9
|
- Notion Q&A [\#13](https://github.com/BoxcarsAI/boxcars/issues/13)
|
10
10
|
|
11
|
+
**Closed issues:**
|
12
|
+
|
13
|
+
- undefined method `default\_train' for Boxcars:Module \(ActiveRecord example\) [\#66](https://github.com/BoxcarsAI/boxcars/issues/66)
|
14
|
+
- Chore: reduce the number of markdown files in Notion DB folder [\#56](https://github.com/BoxcarsAI/boxcars/issues/56)
|
15
|
+
|
16
|
+
**Merged pull requests:**
|
17
|
+
|
18
|
+
- \[DRAFT\] Feature - add in memory vector store [\#65](https://github.com/BoxcarsAI/boxcars/pull/65) ([jaigouk](https://github.com/jaigouk))
|
19
|
+
- Chore - rename module name from Embeddings to VectorStores [\#63](https://github.com/BoxcarsAI/boxcars/pull/63) ([jaigouk](https://github.com/jaigouk))
|
20
|
+
- remove bunch of markdown files in Notion\_DB directory [\#62](https://github.com/BoxcarsAI/boxcars/pull/62) ([jaigouk](https://github.com/jaigouk))
|
21
|
+
- Fixed typo in README.md [\#61](https://github.com/BoxcarsAI/boxcars/pull/61) ([robmack](https://github.com/robmack))
|
22
|
+
|
11
23
|
## [v0.2.9](https://github.com/BoxcarsAI/boxcars/tree/v0.2.9) (2023-04-22)
|
12
24
|
|
13
25
|
[Full Changelog](https://github.com/BoxcarsAI/boxcars/compare/v0.2.8...v0.2.9)
|
data/Gemfile.lock
CHANGED
data/lib/boxcars/boxcar.rb
CHANGED
@@ -5,7 +5,7 @@ require 'hnswlib'
|
|
5
5
|
require 'json'
|
6
6
|
|
7
7
|
module Boxcars
|
8
|
-
module
|
8
|
+
module VectorStore
|
9
9
|
module Hnswlib
|
10
10
|
class BuildVectorStore
|
11
11
|
include VectorStore
|
@@ -76,7 +76,7 @@ module Boxcars
|
|
76
76
|
|
77
77
|
docs = []
|
78
78
|
data.each do |chunk|
|
79
|
-
doc_output = Boxcars::
|
79
|
+
doc_output = Boxcars::VectorStore::SplitText.call(
|
80
80
|
separator: "\n", chunk_size: split_chunk_size, chunk_overlap: 0, text: chunk
|
81
81
|
)
|
82
82
|
docs.concat(doc_output)
|
@@ -98,7 +98,7 @@ module Boxcars
|
|
98
98
|
|
99
99
|
puts "Initializing Store..."
|
100
100
|
openai_client = Openai.open_ai_client
|
101
|
-
embeddings_with_dim = Boxcars::
|
101
|
+
embeddings_with_dim = Boxcars::VectorStore::EmbedViaOpenAI.call(texts: documents, client: openai_client)
|
102
102
|
document_embeddings = embeddings_with_dim.map.with_index do |item, index|
|
103
103
|
{ doc_id: index, embedding: item[:embedding], document: documents[index] }
|
104
104
|
end
|
@@ -110,7 +110,7 @@ module Boxcars
|
|
110
110
|
return true unless rebuild_required?
|
111
111
|
|
112
112
|
puts "Saving Vectorstore"
|
113
|
-
Boxcars::
|
113
|
+
Boxcars::VectorStore::Hnswlib::SaveToHnswlib.call(
|
114
114
|
document_embeddings: embeddings_with_config[:document_embeddings],
|
115
115
|
index_file_path: index_file_path,
|
116
116
|
json_doc_file_path: json_doc_file_path,
|
@@ -121,7 +121,7 @@ module Boxcars
|
|
121
121
|
|
122
122
|
def hnswlib_config(dim)
|
123
123
|
# dim: length of datum point vector that will be indexed.
|
124
|
-
Boxcars::
|
124
|
+
Boxcars::VectorStore::Hnswlib::HnswlibConfig.new(
|
125
125
|
metric: "l2", max_item: 10000, dim: dim
|
126
126
|
)
|
127
127
|
end
|
@@ -5,14 +5,14 @@ require 'json'
|
|
5
5
|
require 'fileutils'
|
6
6
|
|
7
7
|
module Boxcars
|
8
|
-
module
|
8
|
+
module VectorStore
|
9
9
|
module Hnswlib
|
10
10
|
class SaveToHnswlib
|
11
11
|
include VectorStore
|
12
12
|
|
13
13
|
# @param document_embeddings [Array] An array of hashes containing the document id, document text, and embedding.
|
14
14
|
# @param index_file_path [String] The path to the index file.
|
15
|
-
# @param hnswlib_config [Boxcars::
|
15
|
+
# @param hnswlib_config [Boxcars::VectorStore::Hnswlib::Config] The config object for the hnswlib index.
|
16
16
|
# @option json_doc_file_path [String] Optional. The path to the json file containing the document text.
|
17
17
|
def initialize(document_embeddings:, index_file_path:, hnswlib_config:, json_doc_file_path: nil)
|
18
18
|
@document_embeddings = document_embeddings
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Boxcars
|
4
|
-
module
|
4
|
+
module VectorStore
|
5
5
|
module InMemory
|
6
6
|
MemoryVector = Struct.new(:content, :embedding, :metadatax)
|
7
7
|
|
@@ -49,9 +49,9 @@ module Boxcars
|
|
49
49
|
@embeddings_method ||=
|
50
50
|
case @embedding_tool
|
51
51
|
when :openai
|
52
|
-
{ klass: Boxcars::
|
52
|
+
{ klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
|
53
53
|
when :tensorflow
|
54
|
-
{ klass: Boxcars::
|
54
|
+
{ klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
|
55
55
|
end
|
56
56
|
end
|
57
57
|
|
@@ -9,14 +9,14 @@
|
|
9
9
|
# { page_content: "what's this", metadata: { a: 1 } },
|
10
10
|
# ]
|
11
11
|
#
|
12
|
-
# vector_documents = Boxcars::
|
12
|
+
# vector_documents = Boxcars::VectorStore::InMemory::AddDocuments.call(embedding_tool: :openai, documents: documents)
|
13
13
|
#
|
14
|
-
# result = Boxcars::
|
14
|
+
# result = Boxcars::VectorStore::InMemory::Search.call(vecotr_documents: vector_documents, query: "hello")
|
15
15
|
#
|
16
|
-
# expect(result).to eq(Boxcars::
|
16
|
+
# expect(result).to eq(Boxcars::VectorStore::Document.new({ page_content: "hello", metadata: { a: 1 } }))
|
17
17
|
|
18
18
|
module Boxcars
|
19
|
-
module
|
19
|
+
module VectorStore
|
20
20
|
module InMemory
|
21
21
|
class Search
|
22
22
|
include VectorStore
|
@@ -63,9 +63,9 @@ module Boxcars
|
|
63
63
|
def embeddings_method(embedding_tool)
|
64
64
|
case embedding_tool
|
65
65
|
when :openai
|
66
|
-
{ klass: Boxcars::
|
66
|
+
{ klass: Boxcars::VectorStore::EmbedViaOpenAI, client: openai_client }
|
67
67
|
when :tensorflow
|
68
|
-
{ klass: Boxcars::
|
68
|
+
{ klass: Boxcars::VectorStore::EmbedViaTensorflow, client: nil }
|
69
69
|
end
|
70
70
|
end
|
71
71
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'hnswlib'
|
4
4
|
|
5
5
|
module Boxcars
|
6
|
-
module
|
6
|
+
module VectorStore
|
7
7
|
class SimilaritySearch
|
8
8
|
def initialize(embeddings:, vector_store:, openai_connection: nil, openai_access_token: nil)
|
9
9
|
@embeddings = embeddings
|
@@ -32,13 +32,13 @@ module Boxcars
|
|
32
32
|
end
|
33
33
|
|
34
34
|
def convert_query_to_vector(query)
|
35
|
-
Boxcars::
|
35
|
+
Boxcars::VectorStore::EmbedViaOpenAI.call(texts: [query], client: openai_connection).first[:embedding]
|
36
36
|
end
|
37
37
|
|
38
38
|
def create_similarity_search_instance
|
39
39
|
case vector_store
|
40
40
|
when ::Hnswlib::HierarchicalNSW
|
41
|
-
Boxcars::
|
41
|
+
Boxcars::VectorStore::Hnswlib::HnswlibSearch.new(
|
42
42
|
vector_store: vector_store,
|
43
43
|
options: { json_doc_path: embeddings, num_neighbors: 2 }
|
44
44
|
)
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
+
module Boxcars
|
5
|
+
module VectorStore
|
6
|
+
module ClassMethods
|
7
|
+
VectorStoreError = Class.new(StandardError)
|
8
|
+
|
9
|
+
def call(*args, **kw_args)
|
10
|
+
new(*args, **kw_args).call
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def self.included(base)
|
15
|
+
base.extend(ClassMethods)
|
16
|
+
|
17
|
+
class << base
|
18
|
+
private :new
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
require_relative "vector_store/document"
|
25
|
+
require_relative "vector_store/embed_via_open_ai"
|
26
|
+
require_relative "vector_store/embed_via_tensorflow"
|
27
|
+
require_relative "vector_store/split_text"
|
28
|
+
require_relative "vector_store/similarity_search"
|
29
|
+
require_relative "vector_store/hnswlib/hnswlib_config"
|
30
|
+
require_relative "vector_store/hnswlib/save_to_hnswlib"
|
31
|
+
require_relative "vector_store/hnswlib/build_vector_store"
|
32
|
+
require_relative "vector_store/hnswlib/hnswlib_search"
|
33
|
+
require_relative "vector_store/in_memory/add_documents"
|
34
|
+
require_relative "vector_store/in_memory/search"
|
data/lib/boxcars/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: boxcars
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Francis Sullivan
|
@@ -124,19 +124,6 @@ files:
|
|
124
124
|
- lib/boxcars/boxcar/google_search.rb
|
125
125
|
- lib/boxcars/boxcar/sql.rb
|
126
126
|
- lib/boxcars/boxcar/swagger.rb
|
127
|
-
- lib/boxcars/boxcar/vector_search.rb
|
128
|
-
- lib/boxcars/boxcar/vector_store.rb
|
129
|
-
- lib/boxcars/boxcar/vector_stores/document.rb
|
130
|
-
- lib/boxcars/boxcar/vector_stores/embed_via_open_ai.rb
|
131
|
-
- lib/boxcars/boxcar/vector_stores/embed_via_tensorflow.rb
|
132
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/build_vector_store.rb
|
133
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_config.rb
|
134
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/hnswlib_search.rb
|
135
|
-
- lib/boxcars/boxcar/vector_stores/hnswlib/save_to_hnswlib.rb
|
136
|
-
- lib/boxcars/boxcar/vector_stores/in_memory/add_documents.rb
|
137
|
-
- lib/boxcars/boxcar/vector_stores/in_memory/search.rb
|
138
|
-
- lib/boxcars/boxcar/vector_stores/similarity_search.rb
|
139
|
-
- lib/boxcars/boxcar/vector_stores/split_text.rb
|
140
127
|
- lib/boxcars/boxcar/wikipedia_search.rb
|
141
128
|
- lib/boxcars/conversation.rb
|
142
129
|
- lib/boxcars/conversation_prompt.rb
|
@@ -152,6 +139,19 @@ files:
|
|
152
139
|
- lib/boxcars/train/train_action.rb
|
153
140
|
- lib/boxcars/train/train_finish.rb
|
154
141
|
- lib/boxcars/train/zero_shot.rb
|
142
|
+
- lib/boxcars/vector_search.rb
|
143
|
+
- lib/boxcars/vector_store.rb
|
144
|
+
- lib/boxcars/vector_store/document.rb
|
145
|
+
- lib/boxcars/vector_store/embed_via_open_ai.rb
|
146
|
+
- lib/boxcars/vector_store/embed_via_tensorflow.rb
|
147
|
+
- lib/boxcars/vector_store/hnswlib/build_vector_store.rb
|
148
|
+
- lib/boxcars/vector_store/hnswlib/hnswlib_config.rb
|
149
|
+
- lib/boxcars/vector_store/hnswlib/hnswlib_search.rb
|
150
|
+
- lib/boxcars/vector_store/hnswlib/save_to_hnswlib.rb
|
151
|
+
- lib/boxcars/vector_store/in_memory/add_documents.rb
|
152
|
+
- lib/boxcars/vector_store/in_memory/search.rb
|
153
|
+
- lib/boxcars/vector_store/similarity_search.rb
|
154
|
+
- lib/boxcars/vector_store/split_text.rb
|
155
155
|
- lib/boxcars/version.rb
|
156
156
|
homepage: https://github.com/BoxcarsAI/boxcars
|
157
157
|
licenses:
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# Boxcars is a framework for running a series of tools to get an answer to a question.
|
4
|
-
module Boxcars
|
5
|
-
module VectorStore
|
6
|
-
module ClassMethods
|
7
|
-
VectorStoresError = Class.new(StandardError)
|
8
|
-
|
9
|
-
def call(*args, **kw_args)
|
10
|
-
new(*args, **kw_args).call
|
11
|
-
end
|
12
|
-
end
|
13
|
-
|
14
|
-
def self.included(base)
|
15
|
-
base.extend(ClassMethods)
|
16
|
-
|
17
|
-
class << base
|
18
|
-
private :new
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
require_relative "vector_stores/document"
|
25
|
-
require_relative "vector_stores/embed_via_open_ai"
|
26
|
-
require_relative "vector_stores/embed_via_tensorflow"
|
27
|
-
require_relative "vector_stores/split_text"
|
28
|
-
require_relative "vector_stores/similarity_search"
|
29
|
-
require_relative "vector_stores/hnswlib/hnswlib_config"
|
30
|
-
require_relative "vector_stores/hnswlib/save_to_hnswlib"
|
31
|
-
require_relative "vector_stores/hnswlib/build_vector_store"
|
32
|
-
require_relative "vector_stores/hnswlib/hnswlib_search"
|
33
|
-
require_relative "vector_stores/in_memory/add_documents"
|
34
|
-
require_relative "vector_stores/in_memory/search"
|