boxcars 0.7.7 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -3
- data/.ruby-version +1 -1
- data/Gemfile +3 -13
- data/Gemfile.lock +29 -25
- data/POSTHOG_TEST_README.md +118 -0
- data/README.md +305 -0
- data/boxcars.gemspec +1 -2
- data/lib/boxcars/boxcar/active_record.rb +9 -10
- data/lib/boxcars/boxcar/calculator.rb +2 -2
- data/lib/boxcars/boxcar/engine_boxcar.rb +4 -4
- data/lib/boxcars/boxcar/google_search.rb +2 -2
- data/lib/boxcars/boxcar/json_engine_boxcar.rb +1 -1
- data/lib/boxcars/boxcar/ruby_calculator.rb +1 -1
- data/lib/boxcars/boxcar/sql_base.rb +4 -4
- data/lib/boxcars/boxcar/swagger.rb +3 -3
- data/lib/boxcars/boxcar/vector_answer.rb +3 -3
- data/lib/boxcars/boxcar/xml_engine_boxcar.rb +1 -1
- data/lib/boxcars/boxcar.rb +6 -6
- data/lib/boxcars/conversation_prompt.rb +3 -3
- data/lib/boxcars/engine/anthropic.rb +121 -23
- data/lib/boxcars/engine/cerebras.rb +2 -2
- data/lib/boxcars/engine/cohere.rb +135 -9
- data/lib/boxcars/engine/gemini_ai.rb +151 -76
- data/lib/boxcars/engine/google.rb +2 -2
- data/lib/boxcars/engine/gpt4all_eng.rb +92 -34
- data/lib/boxcars/engine/groq.rb +124 -73
- data/lib/boxcars/engine/intelligence_base.rb +52 -17
- data/lib/boxcars/engine/ollama.rb +127 -47
- data/lib/boxcars/engine/openai.rb +186 -103
- data/lib/boxcars/engine/perplexityai.rb +116 -136
- data/lib/boxcars/engine/together.rb +2 -2
- data/lib/boxcars/engine/unified_observability.rb +430 -0
- data/lib/boxcars/engine.rb +4 -3
- data/lib/boxcars/engines.rb +74 -0
- data/lib/boxcars/observability.rb +44 -0
- data/lib/boxcars/observability_backend.rb +17 -0
- data/lib/boxcars/observability_backends/multi_backend.rb +42 -0
- data/lib/boxcars/observability_backends/posthog_backend.rb +89 -0
- data/lib/boxcars/observation.rb +8 -8
- data/lib/boxcars/prompt.rb +16 -4
- data/lib/boxcars/result.rb +7 -12
- data/lib/boxcars/ruby_repl.rb +1 -1
- data/lib/boxcars/train/train_action.rb +1 -1
- data/lib/boxcars/train/xml_train.rb +3 -3
- data/lib/boxcars/train/xml_zero_shot.rb +1 -1
- data/lib/boxcars/train/zero_shot.rb +3 -3
- data/lib/boxcars/train.rb +1 -1
- data/lib/boxcars/vector_search.rb +5 -5
- data/lib/boxcars/vector_store/pgvector/build_from_array.rb +116 -88
- data/lib/boxcars/vector_store/pgvector/build_from_files.rb +106 -80
- data/lib/boxcars/vector_store/pgvector/save_to_database.rb +148 -122
- data/lib/boxcars/vector_store/pgvector/search.rb +157 -131
- data/lib/boxcars/vector_store.rb +4 -4
- data/lib/boxcars/version.rb +1 -1
- data/lib/boxcars.rb +31 -20
- metadata +11 -21
@@ -0,0 +1,89 @@
|
|
1
|
+
# Ensure the base module is available
|
2
|
+
require_relative '../observability_backend'
|
3
|
+
|
4
|
+
module Boxcars
|
5
|
+
# An observability backend for sending events to PostHog.
|
6
|
+
#
|
7
|
+
# This backend requires the `posthog-ruby` gem.
|
8
|
+
# Add `gem 'posthog-ruby'` to your Gemfile to use this backend.
|
9
|
+
#
|
10
|
+
# Example Usage:
|
11
|
+
# require 'boxcars/observability_backends/posthog_backend'
|
12
|
+
# Boxcars::Observability.backend = Boxcars::PosthogBackend.new(
|
13
|
+
# api_key: 'YOUR_POSTHOG_API_KEY',
|
14
|
+
# host: 'https://app.posthog.com' # or your self-hosted instance
|
15
|
+
# )
|
16
|
+
#
|
17
|
+
# # To track user-specific events, ensure :user_id is present in properties
|
18
|
+
# Boxcars::Observability.track(
|
19
|
+
# event: 'my_event',
|
20
|
+
# properties: { user_id: 'user_123', custom_data: 'value' }
|
21
|
+
# )
|
22
|
+
class PosthogBackend
|
23
|
+
include Boxcars::ObservabilityBackend
|
24
|
+
|
25
|
+
# Initializes the PosthogBackend.
|
26
|
+
# Configures the PostHog client with the provided API key and host.
|
27
|
+
#
|
28
|
+
# @param api_key [String] Your PostHog project API key.
|
29
|
+
# @param host [String] The PostHog API host. Defaults to 'https://app.posthog.com'.
|
30
|
+
# @param _personal_api_key [String, nil] Optional: A personal API key for server-side operations if needed.
|
31
|
+
# @param on_error [Proc, nil] Optional: A lambda/proc to call when an error occurs during event capture.
|
32
|
+
# It receives the error code and error body as arguments.
|
33
|
+
# Defaults to a proc that logs the error to stderr.
|
34
|
+
# @raise [LoadError] if the 'posthog-ruby' gem is not available.
|
35
|
+
def initialize(api_key:, host: 'https://app.posthog.com', _personal_api_key: nil, on_error: nil)
|
36
|
+
begin
|
37
|
+
require 'posthog'
|
38
|
+
rescue LoadError
|
39
|
+
raise LoadError, "The 'posthog-ruby' gem is required to use PosthogBackend. Please add it to your Gemfile."
|
40
|
+
end
|
41
|
+
|
42
|
+
@on_error_proc = on_error || proc do |status, body|
|
43
|
+
Boxcars.error("PostHog error: Status #{status}, Body: #{body}", :red)
|
44
|
+
end
|
45
|
+
|
46
|
+
# The posthog-ruby gem uses a simpler API
|
47
|
+
@posthog_client = PostHog::Client.new(
|
48
|
+
api_key:,
|
49
|
+
host:,
|
50
|
+
on_error: @on_error_proc
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Tracks an event with PostHog.
|
55
|
+
#
|
56
|
+
# The `:user_id` property is used as PostHog's `distinct_id`. If not provided,
|
57
|
+
# events might be tracked anonymously or associated with a default/server ID
|
58
|
+
# depending on PostHog's SDK behavior.
|
59
|
+
#
|
60
|
+
# All other properties are passed as event properties to PostHog.
|
61
|
+
#
|
62
|
+
# @param event [String, Symbol] The name of the event to track.
|
63
|
+
# @param properties [Hash] A hash of properties for the event.
|
64
|
+
# It's recommended to include a `:user_id` for user-specific tracking.
|
65
|
+
def track(event:, properties:)
|
66
|
+
# Ensure properties is a hash, duplicate to avoid mutation by PostHog or other backends
|
67
|
+
tracking_properties = properties.is_a?(Hash) ? properties.dup : {}
|
68
|
+
|
69
|
+
distinct_id = tracking_properties.delete(:user_id) || tracking_properties.delete('user_id') || "anonymous_user"
|
70
|
+
|
71
|
+
# The PostHog gem's capture method handles distinct_id and properties.
|
72
|
+
# It's important that distinct_id is a string.
|
73
|
+
@posthog_client.capture(
|
74
|
+
distinct_id: distinct_id.to_s, # Ensure distinct_id is a string
|
75
|
+
event: event.to_s, # Ensure event name is a string
|
76
|
+
properties: tracking_properties
|
77
|
+
)
|
78
|
+
# The posthog-ruby client handles flushing events asynchronously.
|
79
|
+
# If immediate flushing is needed for testing or specific scenarios:
|
80
|
+
# @posthog_client.flush
|
81
|
+
end
|
82
|
+
|
83
|
+
# Flushes any pending events to PostHog immediately.
|
84
|
+
# This is useful for testing or when you need to ensure events are sent before the process exits.
|
85
|
+
def flush
|
86
|
+
@posthog_client.flush if @posthog_client.respond_to?(:flush)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/boxcars/observation.rb
CHANGED
@@ -17,14 +17,14 @@ module Boxcars
|
|
17
17
|
# @return [Hash] The result as a hash
|
18
18
|
def to_h
|
19
19
|
{
|
20
|
-
note
|
21
|
-
status:
|
20
|
+
note:,
|
21
|
+
status:
|
22
22
|
}.merge(added_context).compact
|
23
23
|
end
|
24
24
|
|
25
25
|
# @return [String] The result as a json string
|
26
|
-
def to_json(*
|
27
|
-
JSON.generate(to_h, *
|
26
|
+
def to_json(*)
|
27
|
+
JSON.generate(to_h, *)
|
28
28
|
end
|
29
29
|
|
30
30
|
# @return [String] An explanation of the result
|
@@ -41,16 +41,16 @@ module Boxcars
|
|
41
41
|
# @param note [String] The text to use for the observation
|
42
42
|
# @param added_context [Hash] Any additional context to add to the result
|
43
43
|
# @return [Boxcars::Observation] The observation
|
44
|
-
def self.ok(note, **
|
45
|
-
new(note
|
44
|
+
def self.ok(note, **)
|
45
|
+
new(note:, status: :ok, **)
|
46
46
|
end
|
47
47
|
|
48
48
|
# create a new Observaton from a text string with a status of :error
|
49
49
|
# @param note [String] The text to use for the observation
|
50
50
|
# @param added_context [Hash] Any additional context to add to the result
|
51
51
|
# @return [Boxcars::Observation] The observation
|
52
|
-
def self.err(note, **
|
53
|
-
new(note
|
52
|
+
def self.err(note, **)
|
53
|
+
new(note:, status: :error, **)
|
54
54
|
end
|
55
55
|
end
|
56
56
|
end
|
data/lib/boxcars/prompt.rb
CHANGED
@@ -56,18 +56,30 @@ module Boxcars
|
|
56
56
|
conversation
|
57
57
|
end
|
58
58
|
|
59
|
-
private
|
60
|
-
|
61
59
|
# format the prompt with the input variables
|
62
60
|
# @param inputs [Hash] The inputs to use for the prompt.
|
63
61
|
# @return [String] The formatted prompt.
|
64
62
|
# @raise [Boxcars::KeyError] if the template has extra keys.
|
65
63
|
def format(inputs)
|
66
|
-
|
64
|
+
# Ensure all input keys are symbols for consistent lookup
|
65
|
+
symbolized_inputs = inputs.transform_keys(&:to_sym)
|
66
|
+
|
67
|
+
# Use sprintf for templates like "hi %<name>s"
|
68
|
+
# Ensure that all keys expected by the template are present in symbolized_inputs
|
69
|
+
template_keys = @template.scan(/%<(\w+)>s/).flatten.map(&:to_sym)
|
70
|
+
missing_keys = template_keys - symbolized_inputs.keys
|
71
|
+
raise ::KeyError, "missing keys: #{missing_keys.join(', ')}" if missing_keys.any?
|
72
|
+
|
73
|
+
# Perform the substitution
|
74
|
+
@template % symbolized_inputs
|
67
75
|
rescue ::KeyError => e
|
68
76
|
first_line = e.message.to_s.split("\n").first
|
69
|
-
Boxcars.error "
|
77
|
+
Boxcars.error "Prompt format error: #{first_line}" # Changed message slightly for clarity
|
70
78
|
raise KeyError, "Prompt format error: #{first_line}"
|
79
|
+
rescue ArgumentError => e # Catch sprintf errors e.g. "too many arguments for format string"
|
80
|
+
first_line = e.message.to_s.split("\n").first
|
81
|
+
Boxcars.error "Prompt format error: #{first_line}"
|
82
|
+
raise ArgumentError, "Prompt format error: #{first_line}"
|
71
83
|
end
|
72
84
|
end
|
73
85
|
end
|
data/lib/boxcars/result.rb
CHANGED
@@ -20,17 +20,12 @@ module Boxcars
|
|
20
20
|
|
21
21
|
# @return [Hash] The result as a hash
|
22
22
|
def to_h
|
23
|
-
{
|
24
|
-
status: status,
|
25
|
-
answer: answer,
|
26
|
-
explanation: explanation,
|
27
|
-
suggestions: suggestions
|
28
|
-
}.merge(added_context).compact
|
23
|
+
{ status:, answer:, explanation:, suggestions: }.merge(added_context).compact
|
29
24
|
end
|
30
25
|
|
31
26
|
# @return [String] The result as a json string
|
32
|
-
def to_json(*
|
33
|
-
JSON.generate(to_h, *
|
27
|
+
def to_json(*)
|
28
|
+
JSON.generate(to_h, *)
|
34
29
|
end
|
35
30
|
|
36
31
|
# @return [String] An explanation of the result
|
@@ -47,22 +42,22 @@ module Boxcars
|
|
47
42
|
# @param text [String] The text to use for the result
|
48
43
|
# @param kwargs [Hash] Any additional kwargs to pass to the result
|
49
44
|
# @return [Boxcars::Result] The result
|
50
|
-
def self.from_text(text, **
|
45
|
+
def self.from_text(text, **)
|
51
46
|
answer = text.delete_prefix('"').delete_suffix('"').strip
|
52
47
|
answer = Regexp.last_match(:answer) if answer =~ /^Answer:\s*(?<answer>.*)$/
|
53
48
|
explanation = "Answer: #{answer}"
|
54
|
-
new(status: :ok, answer
|
49
|
+
new(status: :ok, answer:, explanation:, **)
|
55
50
|
end
|
56
51
|
|
57
52
|
# create a new Result from an error string
|
58
53
|
# @param error [String] The error to use for the result
|
59
54
|
# @param kwargs [Hash] Any additional kwargs to pass to the result
|
60
55
|
# @return [Boxcars::Result] The error result
|
61
|
-
def self.from_error(error, **
|
56
|
+
def self.from_error(error, **)
|
62
57
|
answer = error
|
63
58
|
answer = Regexp.last_match(:answer) if answer =~ /^Error:\s*(?<answer>.*)$/
|
64
59
|
explanation = "Error: #{answer}"
|
65
|
-
new(status: :error, answer
|
60
|
+
new(status: :error, answer:, explanation:, **)
|
66
61
|
end
|
67
62
|
end
|
68
63
|
end
|
data/lib/boxcars/ruby_repl.rb
CHANGED
@@ -19,7 +19,7 @@ module Boxcars
|
|
19
19
|
if output =~ /^Error: /
|
20
20
|
Boxcars.debug output, :red
|
21
21
|
Result.from_error(output, code: code)
|
22
|
-
elsif output.
|
22
|
+
elsif output.nil? || output.strip.empty?
|
23
23
|
Result.from_error("The code you gave me did not print a result", code: code)
|
24
24
|
else
|
25
25
|
output = ::Regexp.last_match(1) if output =~ /^\s*Answer:\s*(.*)$/m
|
@@ -22,7 +22,7 @@ module Boxcars
|
|
22
22
|
# @param log [String] The log of the action.
|
23
23
|
# @return [Boxcars::TrainAction] The train action.
|
24
24
|
def self.from_result(result:, boxcar:, log:)
|
25
|
-
new(boxcar
|
25
|
+
new(boxcar:, boxcar_input: result.to_answer, log:)
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
@@ -38,7 +38,7 @@ module Boxcars
|
|
38
38
|
|
39
39
|
# @return Hash The additional variables for this boxcar.
|
40
40
|
def prediction_additional(_inputs)
|
41
|
-
{ boxcars_xml
|
41
|
+
{ boxcars_xml:, next_actions: }.merge super
|
42
42
|
end
|
43
43
|
|
44
44
|
def build_output(text)
|
@@ -87,11 +87,11 @@ module Boxcars
|
|
87
87
|
# the thought should be the frist line here if it doesn't start with "Action:"
|
88
88
|
Boxcars.debug("Thought: #{thought}", :yellow)
|
89
89
|
|
90
|
-
if final_answer.
|
90
|
+
if final_answer && !final_answer.to_s.strip.empty?
|
91
91
|
Result.new(status: :ok, answer: final_answer, explanation: final_answer)
|
92
92
|
else
|
93
93
|
# we have an unexpected output from the engine
|
94
|
-
unless action.
|
94
|
+
unless action && !action.to_s.strip.empty? && action_input && !action_input.to_s.strip.empty?
|
95
95
|
return [:error, "You gave me an improperly formatted answer or didn't use tags."]
|
96
96
|
end
|
97
97
|
|
@@ -19,7 +19,7 @@ module Boxcars
|
|
19
19
|
@engine_prefix = ''
|
20
20
|
@wants_next_actions = kwargs.fetch(:wants_next_actions, false)
|
21
21
|
prompt ||= my_prompt
|
22
|
-
super(engine
|
22
|
+
super(engine:, boxcars:, prompt:, name:, description:, **kwargs)
|
23
23
|
end
|
24
24
|
|
25
25
|
CTEMPLATE = [
|
@@ -16,12 +16,12 @@ module Boxcars
|
|
16
16
|
def initialize(boxcars:, engine: nil, name: 'Zero Shot', description: 'Zero Shot Train', prompt: nil, **kwargs)
|
17
17
|
@wants_next_actions = kwargs.fetch(:wants_next_actions, false)
|
18
18
|
prompt ||= my_prompt
|
19
|
-
super(engine
|
19
|
+
super(engine:, boxcars:, prompt:, name:, description:, **kwargs)
|
20
20
|
end
|
21
21
|
|
22
22
|
# @return Hash The additional variables for this boxcar.
|
23
23
|
def prediction_additional(_inputs)
|
24
|
-
{ boxcar_names
|
24
|
+
{ boxcar_names:, boxcar_descriptions:, next_actions: }.merge super
|
25
25
|
end
|
26
26
|
|
27
27
|
# Extract the boxcar and input from the engine output.
|
@@ -72,7 +72,7 @@ module Boxcars
|
|
72
72
|
# with "Action Input:" should be separated by a newline.
|
73
73
|
if engine_output.include?(FINAL_ANSWER_ACTION)
|
74
74
|
answer = engine_output.split(FINAL_ANSWER_ACTION).last.strip
|
75
|
-
Result.new(status: :ok, answer
|
75
|
+
Result.new(status: :ok, answer:, explanation: engine_output)
|
76
76
|
else
|
77
77
|
# the thought should be the frist line here if it doesn't start with "Action:"
|
78
78
|
thought = engine_output.split(/\n+/).reject(&:empty?).first
|
data/lib/boxcars/train.rb
CHANGED
@@ -42,7 +42,7 @@ module Boxcars
|
|
42
42
|
def call(query:, count: 1)
|
43
43
|
validate_query(query)
|
44
44
|
query_vector = convert_query_to_vector(query)
|
45
|
-
@vector_search_instance.call(query_vector
|
45
|
+
@vector_search_instance.call(query_vector:, count:)
|
46
46
|
end
|
47
47
|
|
48
48
|
private
|
@@ -53,15 +53,15 @@ module Boxcars
|
|
53
53
|
case vector_documents[:type]
|
54
54
|
when :hnswlib
|
55
55
|
Boxcars::VectorStore::Hnswlib::Search.new(
|
56
|
-
vector_documents:
|
56
|
+
vector_documents:
|
57
57
|
)
|
58
58
|
when :in_memory
|
59
59
|
Boxcars::VectorStore::InMemory::Search.new(
|
60
|
-
vector_documents:
|
60
|
+
vector_documents:
|
61
61
|
)
|
62
62
|
when :pgvector
|
63
63
|
Boxcars::VectorStore::Pgvector::Search.new(
|
64
|
-
vector_documents:
|
64
|
+
vector_documents:
|
65
65
|
)
|
66
66
|
else
|
67
67
|
raise_argument_error('Unsupported vector store provided')
|
@@ -69,7 +69,7 @@ module Boxcars
|
|
69
69
|
end
|
70
70
|
|
71
71
|
def default_connection(openai_access_token: nil)
|
72
|
-
Openai.open_ai_client(openai_access_token:
|
72
|
+
Openai.open_ai_client(openai_access_token:)
|
73
73
|
end
|
74
74
|
|
75
75
|
def validate_query(query)
|
@@ -1,106 +1,134 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
module
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
3
|
+
if Gem.loaded_specs.key?('pgvector') && Gem.loaded_specs.key?('pg')
|
4
|
+
module Boxcars
|
5
|
+
module VectorStore
|
6
|
+
# install pgvector: https://github.com/pgvector/pgvector#installation-notes
|
7
|
+
module Pgvector
|
8
|
+
class BuildFromArray
|
9
|
+
include VectorStore
|
10
|
+
|
11
|
+
# initialize the vector store with the following parameters:
|
12
|
+
#
|
13
|
+
# @param params [Hash] A Hash containing the initial configuration.
|
14
|
+
#
|
15
|
+
# @option params [Symbol] :embedding_tool The embedding tool to use. Must be provided.
|
16
|
+
# @option params [Array] :input_array The array of inputs to use for the embedding tool. Must be provided.
|
17
|
+
# each hash item should have content and metadata
|
18
|
+
# [
|
19
|
+
# { content: "hello", metadata: { a: 1 } },
|
20
|
+
# { content: "hi", metadata: { a: 1 } },
|
21
|
+
# { content: "bye", metadata: { a: 1 } },
|
22
|
+
# { content: "what's this", metadata: { a: 1 } }
|
23
|
+
# ]
|
24
|
+
# @option params [String] :database_url The URL of the database where embeddings are stored. Must be provided.
|
25
|
+
# @option params [String] :table_name The name of the database table where embeddings are stored. Must be provided.
|
26
|
+
# @option params [String] :embedding_column_name The name of the database column where embeddings are stored. required.
|
27
|
+
# @option params [String] :content_column_name The name of the db column where content is stored. Must be provided.
|
28
|
+
# @option params [String] :metadata_column_name The name of the database column where metadata is stored. required.
|
29
|
+
def initialize(params)
|
30
|
+
@embedding_tool = params[:embedding_tool] || :openai
|
31
|
+
|
32
|
+
validate_params(embedding_tool, params[:input_array])
|
33
|
+
|
34
|
+
@database_url = params[:database_url]
|
35
|
+
@table_name = params[:table_name]
|
36
|
+
@embedding_column_name = params[:embedding_column_name]
|
37
|
+
@content_column_name = params[:content_column_name]
|
38
|
+
@metadata_column_name = params[:metadata_column_name]
|
39
|
+
|
40
|
+
@input_array = params[:input_array]
|
41
|
+
@pg_vectors = []
|
42
|
+
end
|
42
43
|
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
44
|
+
# @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
|
45
|
+
def call
|
46
|
+
texts = input_array.map { |doc| doc[:content] }
|
47
|
+
vectors = generate_vectors(texts)
|
48
|
+
add_vectors(vectors, input_array)
|
49
|
+
documents = save_vector_store
|
50
|
+
|
51
|
+
{
|
52
|
+
type: :pgvector,
|
53
|
+
vector_store: documents
|
54
|
+
}
|
55
|
+
end
|
55
56
|
|
56
|
-
|
57
|
+
private
|
57
58
|
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
attr_reader :input_array, :embedding_tool, :pg_vectors, :database_url,
|
60
|
+
:table_name, :embedding_column_name, :content_column_name,
|
61
|
+
:metadata_column_name
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
63
|
+
def validate_params(embedding_tool, input_array)
|
64
|
+
raise_argument_error('input_array is nil') unless input_array
|
65
|
+
raise_argument_error('input_array must be an array') unless input_array.is_a?(Array)
|
66
|
+
unless proper_input_array?(input_array)
|
67
|
+
raise_argument_error('items in input_array needs to have content and metadata')
|
68
|
+
end
|
69
|
+
return if %i[openai tensorflow].include?(embedding_tool)
|
67
70
|
|
68
|
-
|
69
|
-
|
71
|
+
raise_argument_error('embedding_tool is invalid') unless %i[openai tensorflow].include?(embedding_tool)
|
72
|
+
end
|
70
73
|
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
+
def proper_input_array?(input_array)
|
75
|
+
return false unless
|
76
|
+
input_array.all? { |hash| hash.key?(:content) && hash.key?(:metadata) }
|
74
77
|
|
75
|
-
|
76
|
-
|
78
|
+
true
|
79
|
+
end
|
77
80
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
+
def add_vectors(vectors, texts)
|
82
|
+
raise_argument_error("vectors are nil") unless vectors
|
83
|
+
raise_argument_error("vectors and texts are not the same size") unless vectors.size == texts.size
|
84
|
+
|
85
|
+
vectors.zip(texts) do |vector, doc|
|
86
|
+
pg_vector = Document.new(
|
87
|
+
content: doc[:content],
|
88
|
+
embedding: vector[:embedding],
|
89
|
+
metadata: doc[:metadata]
|
90
|
+
)
|
91
|
+
@pg_vectors << pg_vector
|
92
|
+
end
|
93
|
+
end
|
81
94
|
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
95
|
+
def save_vector_store
|
96
|
+
result = Boxcars::VectorStore::Pgvector::SaveToDatabase.call(
|
97
|
+
pg_vectors: pg_vectors,
|
98
|
+
database_url: database_url,
|
99
|
+
table_name: table_name,
|
100
|
+
embedding_column_name: embedding_column_name,
|
101
|
+
content_column_name: content_column_name,
|
102
|
+
metadata_column_name: metadata_column_name
|
87
103
|
)
|
88
|
-
|
104
|
+
raise_argument_error('Error saving vector store to database.') unless result
|
105
|
+
|
106
|
+
result
|
89
107
|
end
|
90
108
|
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
else
|
113
|
+
# Define placeholder modules/classes that raise an error if pgvector is not available
|
114
|
+
module Boxcars
|
115
|
+
module VectorStore
|
116
|
+
module Pgvector
|
117
|
+
class PgvectorNotAvailableError < StandardError
|
118
|
+
DEFAULT_MESSAGE = "The 'pgvector' and 'pg' gems are required. Please add them to your Gemfile."
|
119
|
+
def initialize(message = DEFAULT_MESSAGE)
|
120
|
+
super
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
class BuildFromArray
|
125
|
+
def initialize(*_args)
|
126
|
+
raise PgvectorNotAvailableError
|
127
|
+
end
|
91
128
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
database_url: database_url,
|
96
|
-
table_name: table_name,
|
97
|
-
embedding_column_name: embedding_column_name,
|
98
|
-
content_column_name: content_column_name,
|
99
|
-
metadata_column_name: metadata_column_name
|
100
|
-
)
|
101
|
-
raise_argument_error('Error saving vector store to database.') unless result
|
102
|
-
|
103
|
-
result
|
129
|
+
def call(*_args)
|
130
|
+
raise PgvectorNotAvailableError
|
131
|
+
end
|
104
132
|
end
|
105
133
|
end
|
106
134
|
end
|