boxcars 0.7.7 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +6 -3
  3. data/.ruby-version +1 -1
  4. data/Gemfile +3 -13
  5. data/Gemfile.lock +29 -25
  6. data/POSTHOG_TEST_README.md +118 -0
  7. data/README.md +305 -0
  8. data/boxcars.gemspec +1 -2
  9. data/lib/boxcars/boxcar/active_record.rb +9 -10
  10. data/lib/boxcars/boxcar/calculator.rb +2 -2
  11. data/lib/boxcars/boxcar/engine_boxcar.rb +4 -4
  12. data/lib/boxcars/boxcar/google_search.rb +2 -2
  13. data/lib/boxcars/boxcar/json_engine_boxcar.rb +1 -1
  14. data/lib/boxcars/boxcar/ruby_calculator.rb +1 -1
  15. data/lib/boxcars/boxcar/sql_base.rb +4 -4
  16. data/lib/boxcars/boxcar/swagger.rb +3 -3
  17. data/lib/boxcars/boxcar/vector_answer.rb +3 -3
  18. data/lib/boxcars/boxcar/xml_engine_boxcar.rb +1 -1
  19. data/lib/boxcars/boxcar.rb +6 -6
  20. data/lib/boxcars/conversation_prompt.rb +3 -3
  21. data/lib/boxcars/engine/anthropic.rb +121 -23
  22. data/lib/boxcars/engine/cerebras.rb +2 -2
  23. data/lib/boxcars/engine/cohere.rb +135 -9
  24. data/lib/boxcars/engine/gemini_ai.rb +151 -76
  25. data/lib/boxcars/engine/google.rb +2 -2
  26. data/lib/boxcars/engine/gpt4all_eng.rb +92 -34
  27. data/lib/boxcars/engine/groq.rb +124 -73
  28. data/lib/boxcars/engine/intelligence_base.rb +52 -17
  29. data/lib/boxcars/engine/ollama.rb +127 -47
  30. data/lib/boxcars/engine/openai.rb +186 -103
  31. data/lib/boxcars/engine/perplexityai.rb +116 -136
  32. data/lib/boxcars/engine/together.rb +2 -2
  33. data/lib/boxcars/engine/unified_observability.rb +430 -0
  34. data/lib/boxcars/engine.rb +4 -3
  35. data/lib/boxcars/engines.rb +74 -0
  36. data/lib/boxcars/observability.rb +44 -0
  37. data/lib/boxcars/observability_backend.rb +17 -0
  38. data/lib/boxcars/observability_backends/multi_backend.rb +42 -0
  39. data/lib/boxcars/observability_backends/posthog_backend.rb +89 -0
  40. data/lib/boxcars/observation.rb +8 -8
  41. data/lib/boxcars/prompt.rb +16 -4
  42. data/lib/boxcars/result.rb +7 -12
  43. data/lib/boxcars/ruby_repl.rb +1 -1
  44. data/lib/boxcars/train/train_action.rb +1 -1
  45. data/lib/boxcars/train/xml_train.rb +3 -3
  46. data/lib/boxcars/train/xml_zero_shot.rb +1 -1
  47. data/lib/boxcars/train/zero_shot.rb +3 -3
  48. data/lib/boxcars/train.rb +1 -1
  49. data/lib/boxcars/vector_search.rb +5 -5
  50. data/lib/boxcars/vector_store/pgvector/build_from_array.rb +116 -88
  51. data/lib/boxcars/vector_store/pgvector/build_from_files.rb +106 -80
  52. data/lib/boxcars/vector_store/pgvector/save_to_database.rb +148 -122
  53. data/lib/boxcars/vector_store/pgvector/search.rb +157 -131
  54. data/lib/boxcars/vector_store.rb +4 -4
  55. data/lib/boxcars/version.rb +1 -1
  56. data/lib/boxcars.rb +31 -20
  57. metadata +11 -21
@@ -0,0 +1,89 @@
1
+ # Ensure the base module is available
2
+ require_relative '../observability_backend'
3
+
4
+ module Boxcars
5
+ # An observability backend for sending events to PostHog.
6
+ #
7
+ # This backend requires the `posthog-ruby` gem.
8
+ # Add `gem 'posthog-ruby'` to your Gemfile to use this backend.
9
+ #
10
+ # Example Usage:
11
+ # require 'boxcars/observability_backends/posthog_backend'
12
+ # Boxcars::Observability.backend = Boxcars::PosthogBackend.new(
13
+ # api_key: 'YOUR_POSTHOG_API_KEY',
14
+ # host: 'https://app.posthog.com' # or your self-hosted instance
15
+ # )
16
+ #
17
+ # # To track user-specific events, ensure :user_id is present in properties
18
+ # Boxcars::Observability.track(
19
+ # event: 'my_event',
20
+ # properties: { user_id: 'user_123', custom_data: 'value' }
21
+ # )
22
+ class PosthogBackend
23
+ include Boxcars::ObservabilityBackend
24
+
25
+ # Initializes the PosthogBackend.
26
+ # Configures the PostHog client with the provided API key and host.
27
+ #
28
+ # @param api_key [String] Your PostHog project API key.
29
+ # @param host [String] The PostHog API host. Defaults to 'https://app.posthog.com'.
30
+ # @param _personal_api_key [String, nil] Optional: A personal API key for server-side operations if needed.
31
+ # @param on_error [Proc, nil] Optional: A lambda/proc to call when an error occurs during event capture.
32
+ # It receives the error code and error body as arguments.
33
+ # Defaults to a proc that logs the error to stderr.
34
+ # @raise [LoadError] if the 'posthog-ruby' gem is not available.
35
+ def initialize(api_key:, host: 'https://app.posthog.com', _personal_api_key: nil, on_error: nil)
36
+ begin
37
+ require 'posthog'
38
+ rescue LoadError
39
+ raise LoadError, "The 'posthog-ruby' gem is required to use PosthogBackend. Please add it to your Gemfile."
40
+ end
41
+
42
+ @on_error_proc = on_error || proc do |status, body|
43
+ Boxcars.error("PostHog error: Status #{status}, Body: #{body}", :red)
44
+ end
45
+
46
+ # The posthog-ruby gem uses a simpler API
47
+ @posthog_client = PostHog::Client.new(
48
+ api_key:,
49
+ host:,
50
+ on_error: @on_error_proc
51
+ )
52
+ end
53
+
54
+ # Tracks an event with PostHog.
55
+ #
56
+ # The `:user_id` property is used as PostHog's `distinct_id`. If not provided,
57
+ # events might be tracked anonymously or associated with a default/server ID
58
+ # depending on PostHog's SDK behavior.
59
+ #
60
+ # All other properties are passed as event properties to PostHog.
61
+ #
62
+ # @param event [String, Symbol] The name of the event to track.
63
+ # @param properties [Hash] A hash of properties for the event.
64
+ # It's recommended to include a `:user_id` for user-specific tracking.
65
+ def track(event:, properties:)
66
+ # Ensure properties is a hash, duplicate to avoid mutation by PostHog or other backends
67
+ tracking_properties = properties.is_a?(Hash) ? properties.dup : {}
68
+
69
+ distinct_id = tracking_properties.delete(:user_id) || tracking_properties.delete('user_id') || "anonymous_user"
70
+
71
+ # The PostHog gem's capture method handles distinct_id and properties.
72
+ # It's important that distinct_id is a string.
73
+ @posthog_client.capture(
74
+ distinct_id: distinct_id.to_s, # Ensure distinct_id is a string
75
+ event: event.to_s, # Ensure event name is a string
76
+ properties: tracking_properties
77
+ )
78
+ # The posthog-ruby client handles flushing events asynchronously.
79
+ # If immediate flushing is needed for testing or specific scenarios:
80
+ # @posthog_client.flush
81
+ end
82
+
83
+ # Flushes any pending events to PostHog immediately.
84
+ # This is useful for testing or when you need to ensure events are sent before the process exits.
85
+ def flush
86
+ @posthog_client.flush if @posthog_client.respond_to?(:flush)
87
+ end
88
+ end
89
+ end
@@ -17,14 +17,14 @@ module Boxcars
17
17
  # @return [Hash] The result as a hash
18
18
  def to_h
19
19
  {
20
- note: note,
21
- status: status
20
+ note:,
21
+ status:
22
22
  }.merge(added_context).compact
23
23
  end
24
24
 
25
25
  # @return [String] The result as a json string
26
- def to_json(*args)
27
- JSON.generate(to_h, *args)
26
+ def to_json(*)
27
+ JSON.generate(to_h, *)
28
28
  end
29
29
 
30
30
  # @return [String] An explanation of the result
@@ -41,16 +41,16 @@ module Boxcars
41
41
  # @param note [String] The text to use for the observation
42
42
  # @param added_context [Hash] Any additional context to add to the result
43
43
  # @return [Boxcars::Observation] The observation
44
- def self.ok(note, **kwargs)
45
- new(note: note, status: :ok, **kwargs)
44
+ def self.ok(note, **)
45
+ new(note:, status: :ok, **)
46
46
  end
47
47
 
48
48
  # create a new Observaton from a text string with a status of :error
49
49
  # @param note [String] The text to use for the observation
50
50
  # @param added_context [Hash] Any additional context to add to the result
51
51
  # @return [Boxcars::Observation] The observation
52
- def self.err(note, **kwargs)
53
- new(note: note, status: :error, **kwargs)
52
+ def self.err(note, **)
53
+ new(note:, status: :error, **)
54
54
  end
55
55
  end
56
56
  end
@@ -56,18 +56,30 @@ module Boxcars
56
56
  conversation
57
57
  end
58
58
 
59
- private
60
-
61
59
  # format the prompt with the input variables
62
60
  # @param inputs [Hash] The inputs to use for the prompt.
63
61
  # @return [String] The formatted prompt.
64
62
  # @raise [Boxcars::KeyError] if the template has extra keys.
65
63
  def format(inputs)
66
- @template % inputs
64
+ # Ensure all input keys are symbols for consistent lookup
65
+ symbolized_inputs = inputs.transform_keys(&:to_sym)
66
+
67
+ # Use sprintf for templates like "hi %<name>s"
68
+ # Ensure that all keys expected by the template are present in symbolized_inputs
69
+ template_keys = @template.scan(/%<(\w+)>s/).flatten.map(&:to_sym)
70
+ missing_keys = template_keys - symbolized_inputs.keys
71
+ raise ::KeyError, "missing keys: #{missing_keys.join(', ')}" if missing_keys.any?
72
+
73
+ # Perform the substitution
74
+ @template % symbolized_inputs
67
75
  rescue ::KeyError => e
68
76
  first_line = e.message.to_s.split("\n").first
69
- Boxcars.error "Missing prompt input key: #{first_line}"
77
+ Boxcars.error "Prompt format error: #{first_line}" # Changed message slightly for clarity
70
78
  raise KeyError, "Prompt format error: #{first_line}"
79
+ rescue ArgumentError => e # Catch sprintf errors e.g. "too many arguments for format string"
80
+ first_line = e.message.to_s.split("\n").first
81
+ Boxcars.error "Prompt format error: #{first_line}"
82
+ raise ArgumentError, "Prompt format error: #{first_line}"
71
83
  end
72
84
  end
73
85
  end
@@ -20,17 +20,12 @@ module Boxcars
20
20
 
21
21
  # @return [Hash] The result as a hash
22
22
  def to_h
23
- {
24
- status: status,
25
- answer: answer,
26
- explanation: explanation,
27
- suggestions: suggestions
28
- }.merge(added_context).compact
23
+ { status:, answer:, explanation:, suggestions: }.merge(added_context).compact
29
24
  end
30
25
 
31
26
  # @return [String] The result as a json string
32
- def to_json(*args)
33
- JSON.generate(to_h, *args)
27
+ def to_json(*)
28
+ JSON.generate(to_h, *)
34
29
  end
35
30
 
36
31
  # @return [String] An explanation of the result
@@ -47,22 +42,22 @@ module Boxcars
47
42
  # @param text [String] The text to use for the result
48
43
  # @param kwargs [Hash] Any additional kwargs to pass to the result
49
44
  # @return [Boxcars::Result] The result
50
- def self.from_text(text, **kwargs)
45
+ def self.from_text(text, **)
51
46
  answer = text.delete_prefix('"').delete_suffix('"').strip
52
47
  answer = Regexp.last_match(:answer) if answer =~ /^Answer:\s*(?<answer>.*)$/
53
48
  explanation = "Answer: #{answer}"
54
- new(status: :ok, answer: answer, explanation: explanation, **kwargs)
49
+ new(status: :ok, answer:, explanation:, **)
55
50
  end
56
51
 
57
52
  # create a new Result from an error string
58
53
  # @param error [String] The error to use for the result
59
54
  # @param kwargs [Hash] Any additional kwargs to pass to the result
60
55
  # @return [Boxcars::Result] The error result
61
- def self.from_error(error, **kwargs)
56
+ def self.from_error(error, **)
62
57
  answer = error
63
58
  answer = Regexp.last_match(:answer) if answer =~ /^Error:\s*(?<answer>.*)$/
64
59
  explanation = "Error: #{answer}"
65
- new(status: :error, answer: answer, explanation: explanation, **kwargs)
60
+ new(status: :error, answer:, explanation:, **)
66
61
  end
67
62
  end
68
63
  end
@@ -19,7 +19,7 @@ module Boxcars
19
19
  if output =~ /^Error: /
20
20
  Boxcars.debug output, :red
21
21
  Result.from_error(output, code: code)
22
- elsif output.blank?
22
+ elsif output.nil? || output.strip.empty?
23
23
  Result.from_error("The code you gave me did not print a result", code: code)
24
24
  else
25
25
  output = ::Regexp.last_match(1) if output =~ /^\s*Answer:\s*(.*)$/m
@@ -22,7 +22,7 @@ module Boxcars
22
22
  # @param log [String] The log of the action.
23
23
  # @return [Boxcars::TrainAction] The train action.
24
24
  def self.from_result(result:, boxcar:, log:)
25
- new(boxcar: boxcar, boxcar_input: result.to_answer, log: log)
25
+ new(boxcar:, boxcar_input: result.to_answer, log:)
26
26
  end
27
27
  end
28
28
  end
@@ -38,7 +38,7 @@ module Boxcars
38
38
 
39
39
  # @return Hash The additional variables for this boxcar.
40
40
  def prediction_additional(_inputs)
41
- { boxcars_xml: boxcars_xml, next_actions: next_actions }.merge super
41
+ { boxcars_xml:, next_actions: }.merge super
42
42
  end
43
43
 
44
44
  def build_output(text)
@@ -87,11 +87,11 @@ module Boxcars
87
87
  # the thought should be the frist line here if it doesn't start with "Action:"
88
88
  Boxcars.debug("Thought: #{thought}", :yellow)
89
89
 
90
- if final_answer.present?
90
+ if final_answer && !final_answer.to_s.strip.empty?
91
91
  Result.new(status: :ok, answer: final_answer, explanation: final_answer)
92
92
  else
93
93
  # we have an unexpected output from the engine
94
- unless action.present? && action_input.present?
94
+ unless action && !action.to_s.strip.empty? && action_input && !action_input.to_s.strip.empty?
95
95
  return [:error, "You gave me an improperly formatted answer or didn't use tags."]
96
96
  end
97
97
 
@@ -19,7 +19,7 @@ module Boxcars
19
19
  @engine_prefix = ''
20
20
  @wants_next_actions = kwargs.fetch(:wants_next_actions, false)
21
21
  prompt ||= my_prompt
22
- super(engine: engine, boxcars: boxcars, prompt: prompt, name: name, description: description, **kwargs)
22
+ super(engine:, boxcars:, prompt:, name:, description:, **kwargs)
23
23
  end
24
24
 
25
25
  CTEMPLATE = [
@@ -16,12 +16,12 @@ module Boxcars
16
16
  def initialize(boxcars:, engine: nil, name: 'Zero Shot', description: 'Zero Shot Train', prompt: nil, **kwargs)
17
17
  @wants_next_actions = kwargs.fetch(:wants_next_actions, false)
18
18
  prompt ||= my_prompt
19
- super(engine: engine, boxcars: boxcars, prompt: prompt, name: name, description: description, **kwargs)
19
+ super(engine:, boxcars:, prompt:, name:, description:, **kwargs)
20
20
  end
21
21
 
22
22
  # @return Hash The additional variables for this boxcar.
23
23
  def prediction_additional(_inputs)
24
- { boxcar_names: boxcar_names, boxcar_descriptions: boxcar_descriptions, next_actions: next_actions }.merge super
24
+ { boxcar_names:, boxcar_descriptions:, next_actions: }.merge super
25
25
  end
26
26
 
27
27
  # Extract the boxcar and input from the engine output.
@@ -72,7 +72,7 @@ module Boxcars
72
72
  # with "Action Input:" should be separated by a newline.
73
73
  if engine_output.include?(FINAL_ANSWER_ACTION)
74
74
  answer = engine_output.split(FINAL_ANSWER_ACTION).last.strip
75
- Result.new(status: :ok, answer: answer, explanation: engine_output)
75
+ Result.new(status: :ok, answer:, explanation: engine_output)
76
76
  else
77
77
  # the thought should be the frist line here if it doesn't start with "Action:"
78
78
  thought = engine_output.split(/\n+/).reject(&:empty?).first
data/lib/boxcars/train.rb CHANGED
@@ -24,7 +24,7 @@ module Boxcars
24
24
  init_prefixes
25
25
  kwargs[:stop] = ["\n#{observation_prefix}"] unless kwargs.key?(:stop)
26
26
 
27
- super(prompt: prompt, engine: engine, **kwargs)
27
+ super(prompt:, engine:, **kwargs)
28
28
  end
29
29
 
30
30
  def init_prefixes
@@ -42,7 +42,7 @@ module Boxcars
42
42
  def call(query:, count: 1)
43
43
  validate_query(query)
44
44
  query_vector = convert_query_to_vector(query)
45
- @vector_search_instance.call(query_vector: query_vector, count: count)
45
+ @vector_search_instance.call(query_vector:, count:)
46
46
  end
47
47
 
48
48
  private
@@ -53,15 +53,15 @@ module Boxcars
53
53
  case vector_documents[:type]
54
54
  when :hnswlib
55
55
  Boxcars::VectorStore::Hnswlib::Search.new(
56
- vector_documents: vector_documents
56
+ vector_documents:
57
57
  )
58
58
  when :in_memory
59
59
  Boxcars::VectorStore::InMemory::Search.new(
60
- vector_documents: vector_documents
60
+ vector_documents:
61
61
  )
62
62
  when :pgvector
63
63
  Boxcars::VectorStore::Pgvector::Search.new(
64
- vector_documents: vector_documents
64
+ vector_documents:
65
65
  )
66
66
  else
67
67
  raise_argument_error('Unsupported vector store provided')
@@ -69,7 +69,7 @@ module Boxcars
69
69
  end
70
70
 
71
71
  def default_connection(openai_access_token: nil)
72
- Openai.open_ai_client(openai_access_token: openai_access_token)
72
+ Openai.open_ai_client(openai_access_token:)
73
73
  end
74
74
 
75
75
  def validate_query(query)
@@ -1,106 +1,134 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- module Boxcars
4
- module VectorStore
5
- # install pgvector: https://github.com/pgvector/pgvector#installation-notes
6
- module Pgvector
7
- class BuildFromArray
8
- include VectorStore
9
-
10
- # initialize the vector store with the following parameters:
11
- #
12
- # @param params [Hash] A Hash containing the initial configuration.
13
- #
14
- # @option params [Symbol] :embedding_tool The embedding tool to use. Must be provided.
15
- # @option params [Array] :input_array The array of inputs to use for the embedding tool. Must be provided.
16
- # each hash item should have content and metadata
17
- # [
18
- # { content: "hello", metadata: { a: 1 } },
19
- # { content: "hi", metadata: { a: 1 } },
20
- # { content: "bye", metadata: { a: 1 } },
21
- # { content: "what's this", metadata: { a: 1 } }
22
- # ]
23
- # @option params [String] :database_url The URL of the database where embeddings are stored. Must be provided.
24
- # @option params [String] :table_name The name of the database table where embeddings are stored. Must be provided.
25
- # @option params [String] :embedding_column_name The name of the database column where embeddings are stored. required.
26
- # @option params [String] :content_column_name The name of the database column where content is stored. Must be provided.
27
- # @option params [String] :metadata_column_name The name of the database column where metadata is stored. required.
28
- def initialize(params)
29
- @embedding_tool = params[:embedding_tool] || :openai
30
-
31
- validate_params(embedding_tool, params[:input_array])
32
-
33
- @database_url = params[:database_url]
34
- @table_name = params[:table_name]
35
- @embedding_column_name = params[:embedding_column_name]
36
- @content_column_name = params[:content_column_name]
37
- @metadata_column_name = params[:metadata_column_name]
38
-
39
- @input_array = params[:input_array]
40
- @pg_vectors = []
41
- end
3
+ if Gem.loaded_specs.key?('pgvector') && Gem.loaded_specs.key?('pg')
4
+ module Boxcars
5
+ module VectorStore
6
+ # install pgvector: https://github.com/pgvector/pgvector#installation-notes
7
+ module Pgvector
8
+ class BuildFromArray
9
+ include VectorStore
10
+
11
+ # initialize the vector store with the following parameters:
12
+ #
13
+ # @param params [Hash] A Hash containing the initial configuration.
14
+ #
15
+ # @option params [Symbol] :embedding_tool The embedding tool to use. Must be provided.
16
+ # @option params [Array] :input_array The array of inputs to use for the embedding tool. Must be provided.
17
+ # each hash item should have content and metadata
18
+ # [
19
+ # { content: "hello", metadata: { a: 1 } },
20
+ # { content: "hi", metadata: { a: 1 } },
21
+ # { content: "bye", metadata: { a: 1 } },
22
+ # { content: "what's this", metadata: { a: 1 } }
23
+ # ]
24
+ # @option params [String] :database_url The URL of the database where embeddings are stored. Must be provided.
25
+ # @option params [String] :table_name The name of the database table where embeddings are stored. Must be provided.
26
+ # @option params [String] :embedding_column_name The name of the database column where embeddings are stored. required.
27
+ # @option params [String] :content_column_name The name of the db column where content is stored. Must be provided.
28
+ # @option params [String] :metadata_column_name The name of the database column where metadata is stored. required.
29
+ def initialize(params)
30
+ @embedding_tool = params[:embedding_tool] || :openai
31
+
32
+ validate_params(embedding_tool, params[:input_array])
33
+
34
+ @database_url = params[:database_url]
35
+ @table_name = params[:table_name]
36
+ @embedding_column_name = params[:embedding_column_name]
37
+ @content_column_name = params[:content_column_name]
38
+ @metadata_column_name = params[:metadata_column_name]
39
+
40
+ @input_array = params[:input_array]
41
+ @pg_vectors = []
42
+ end
42
43
 
43
- # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
44
- def call
45
- texts = input_array.map { |doc| doc[:content] }
46
- vectors = generate_vectors(texts)
47
- add_vectors(vectors, input_array)
48
- documents = save_vector_store
49
-
50
- {
51
- type: :pgvector,
52
- vector_store: documents
53
- }
54
- end
44
+ # @return [Hash] vector_store: array of hashes with :content, :metadata, and :embedding keys
45
+ def call
46
+ texts = input_array.map { |doc| doc[:content] }
47
+ vectors = generate_vectors(texts)
48
+ add_vectors(vectors, input_array)
49
+ documents = save_vector_store
50
+
51
+ {
52
+ type: :pgvector,
53
+ vector_store: documents
54
+ }
55
+ end
55
56
 
56
- private
57
+ private
57
58
 
58
- attr_reader :input_array, :embedding_tool, :pg_vectors, :database_url,
59
- :table_name, :embedding_column_name, :content_column_name,
60
- :metadata_column_name
59
+ attr_reader :input_array, :embedding_tool, :pg_vectors, :database_url,
60
+ :table_name, :embedding_column_name, :content_column_name,
61
+ :metadata_column_name
61
62
 
62
- def validate_params(embedding_tool, input_array)
63
- raise_argument_error('input_array is nil') unless input_array
64
- raise_argument_error('input_array must be an array') unless input_array.is_a?(Array)
65
- raise_argument_error('items in input_array needs to have content and metadata') unless proper_input_array?(input_array)
66
- return if %i[openai tensorflow].include?(embedding_tool)
63
+ def validate_params(embedding_tool, input_array)
64
+ raise_argument_error('input_array is nil') unless input_array
65
+ raise_argument_error('input_array must be an array') unless input_array.is_a?(Array)
66
+ unless proper_input_array?(input_array)
67
+ raise_argument_error('items in input_array needs to have content and metadata')
68
+ end
69
+ return if %i[openai tensorflow].include?(embedding_tool)
67
70
 
68
- raise_argument_error('embedding_tool is invalid') unless %i[openai tensorflow].include?(embedding_tool)
69
- end
71
+ raise_argument_error('embedding_tool is invalid') unless %i[openai tensorflow].include?(embedding_tool)
72
+ end
70
73
 
71
- def proper_input_array?(input_array)
72
- return false unless
73
- input_array.all? { |hash| hash.key?(:content) && hash.key?(:metadata) }
74
+ def proper_input_array?(input_array)
75
+ return false unless
76
+ input_array.all? { |hash| hash.key?(:content) && hash.key?(:metadata) }
74
77
 
75
- true
76
- end
78
+ true
79
+ end
77
80
 
78
- def add_vectors(vectors, texts)
79
- raise_argument_error("vectors are nil") unless vectors
80
- raise_argument_error("vectors and texts are not the same size") unless vectors.size == texts.size
81
+ def add_vectors(vectors, texts)
82
+ raise_argument_error("vectors are nil") unless vectors
83
+ raise_argument_error("vectors and texts are not the same size") unless vectors.size == texts.size
84
+
85
+ vectors.zip(texts) do |vector, doc|
86
+ pg_vector = Document.new(
87
+ content: doc[:content],
88
+ embedding: vector[:embedding],
89
+ metadata: doc[:metadata]
90
+ )
91
+ @pg_vectors << pg_vector
92
+ end
93
+ end
81
94
 
82
- vectors.zip(texts) do |vector, doc|
83
- pg_vector = Document.new(
84
- content: doc[:content],
85
- embedding: vector[:embedding],
86
- metadata: doc[:metadata]
95
+ def save_vector_store
96
+ result = Boxcars::VectorStore::Pgvector::SaveToDatabase.call(
97
+ pg_vectors: pg_vectors,
98
+ database_url: database_url,
99
+ table_name: table_name,
100
+ embedding_column_name: embedding_column_name,
101
+ content_column_name: content_column_name,
102
+ metadata_column_name: metadata_column_name
87
103
  )
88
- @pg_vectors << pg_vector
104
+ raise_argument_error('Error saving vector store to database.') unless result
105
+
106
+ result
89
107
  end
90
108
  end
109
+ end
110
+ end
111
+ end
112
+ else
113
+ # Define placeholder modules/classes that raise an error if pgvector is not available
114
+ module Boxcars
115
+ module VectorStore
116
+ module Pgvector
117
+ class PgvectorNotAvailableError < StandardError
118
+ DEFAULT_MESSAGE = "The 'pgvector' and 'pg' gems are required. Please add them to your Gemfile."
119
+ def initialize(message = DEFAULT_MESSAGE)
120
+ super
121
+ end
122
+ end
123
+
124
+ class BuildFromArray
125
+ def initialize(*_args)
126
+ raise PgvectorNotAvailableError
127
+ end
91
128
 
92
- def save_vector_store
93
- result = Boxcars::VectorStore::Pgvector::SaveToDatabase.call(
94
- pg_vectors: pg_vectors,
95
- database_url: database_url,
96
- table_name: table_name,
97
- embedding_column_name: embedding_column_name,
98
- content_column_name: content_column_name,
99
- metadata_column_name: metadata_column_name
100
- )
101
- raise_argument_error('Error saving vector store to database.') unless result
102
-
103
- result
129
+ def call(*_args)
130
+ raise PgvectorNotAvailableError
131
+ end
104
132
  end
105
133
  end
106
134
  end