RubyGems - raif - Versions diffs - 1.1.0 → 1.2.0 - Mend

raif 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

data/lib/raif/utils/html_fragment_processor.rb ADDED Viewed

@@ -0,0 +1,169 @@
+# frozen_string_literal: true
+# Utility class for processing HTML fragments with various cleaning and transformation operations.
+#
+# This class provides methods for sanitizing HTML content, converting markdown links to HTML,
+# processing existing HTML links (adding target="_blank", stripping tracking parameters),
+# and removing tracking parameters from URLs.
+class Raif::Utils::HtmlFragmentProcessor
+  # List of common tracking parameters to remove from URLs
+  TRACKING_PARAMS = %w[
+    utm_source
+    utm_medium
+    utm_campaign
+    utm_term
+    utm_content
+    utm_id
+  ]
+  class << self
+    # Cleans and sanitizes an HTML fragment by removing empty text nodes and dangerous content.
+    #
+    # @param html [String, Nokogiri::HTML::DocumentFragment] The HTML content to clean
+    # @param allowed_tags [Array<String>, nil] Array of allowed HTML tags. Defaults to Rails HTML5 safe list
+    # @param allowed_attributes [Array<String>, nil] Array of allowed HTML attributes. Defaults to Rails HTML5 safe list
+    # @return [String] Cleaned and sanitized HTML string
+    #
+    # @example
+    #   clean_html_fragment("<script>alert('xss')</script><p>Safe content</p>")
+    #   # => "<p>Safe content</p>"
+    #
+    # @example With custom allowed tags
+    #   clean_html_fragment("<p>Para</p><div>Div</div>", allowed_tags: %w[p])
+    #   # => "<p>Para</p>Div"
+    def clean_html_fragment(html, allowed_tags: nil, allowed_attributes: nil)
+      fragment = html.is_a?(Nokogiri::HTML::DocumentFragment) ? html : Nokogiri::HTML.fragment(html)
+      fragment.traverse do |node|
+        if node.text? && node.text.strip.empty?
+          node.remove
+        end
+      end
+      allowed_tags = allowed_tags.presence || Rails::HTML5::SafeListSanitizer.allowed_tags
+      allowed_attributes = allowed_attributes.presence || Rails::HTML5::SafeListSanitizer.allowed_attributes
+      ActionController::Base.helpers.sanitize(fragment.to_html, tags: allowed_tags, attributes: allowed_attributes).strip
+    end
+    # Converts markdown-style links to HTML anchor tags with target="_blank" and rel="noopener".
+    #
+    # Converts [text](url) format to <a href="url" target="_blank" rel="noopener">text</a>.
+    # Also strips tracking parameters from the URLs.
+    #
+    # @param text [String] The text content that may contain markdown links
+    # @return [String] HTML with markdown links converted to anchor tags
+    #
+    # @example
+    #   convert_markdown_links_to_html("Check out [Google](https://google.com) for search.")
+    #   # => 'Check out <a href="https://google.com" target="_blank" rel="noopener">Google</a> for search.'
+    #
+    # @example With tracking parameters
+    #   convert_markdown_links_to_html("[Example](https://example.com?utm_source=test&param=keep)")
+    #   # => '<a href="https://example.com?param=keep" target="_blank" rel="noopener">Example</a>'
+    def convert_markdown_links_to_html(text)
+      # Convert markdown links [text](url) to HTML links <a href="url" target="_blank" rel="noopener">text</a>
+      text.gsub(/\[([^\]]*)\]\(([^)]+)\)/) do |_match|
+        text = ::Regexp.last_match(1)
+        url = ::Regexp.last_match(2)
+        clean_url = strip_tracking_parameters(url)
+        %(<a href="#{CGI.escapeHTML(clean_url)}" target="_blank" rel="noopener">#{CGI.escapeHTML(text)}</a>)
+      end
+    end
+    # Processes existing HTML links by optionally adding target="_blank" and stripping tracking parameters.
+    #
+    # This method provides fine-grained control over link processing with configurable options
+    # for both target="_blank" addition and tracking parameter removal.
+    #
+    # @param html [String, Nokogiri::HTML::DocumentFragment] The HTML content containing links to process
+    # @param add_target_blank [Boolean] Whether to add target="_blank" and rel="noopener" to links (required)
+    # @param strip_tracking_parameters [Boolean] Whether to remove tracking parameters from URLs (required)
+    # @return [String] Processed HTML with modified links
+    #
+    # @example Default behavior (adds target="_blank" and strips tracking params)
+    #   process_links('<a href="https://example.com?utm_source=test">Link</a>', add_target_blank: true, strip_tracking_parameters: true)
+    #   # => '<a href="https://example.com" target="_blank" rel="noopener">Link</a>'
+    #
+    # @example Only strip tracking parameters
+    #   process_links(html, add_target_blank: false, strip_tracking_parameters: true)
+    #   # => '<a href="https://example.com">Link</a>'
+    #
+    # @example Only add target="_blank"
+    #   process_links(html, add_target_blank: true, strip_tracking_parameters: false)
+    #   # => '<a href="https://example.com?utm_source=test" target="_blank" rel="noopener">Link</a>'
+    #
+    # @example No processing
+    #   process_links(html, add_target_blank: false, strip_tracking_parameters: false)
+    #   # => Original HTML unchanged
+    def process_links(html, add_target_blank:, strip_tracking_parameters:)
+      fragment = html.is_a?(Nokogiri::HTML::DocumentFragment) ? html : Nokogiri::HTML.fragment(html)
+      fragment.css("a").each do |link|
+        if add_target_blank
+          link["target"] = "_blank"
+          link["rel"] = "noopener"
+        end
+        if strip_tracking_parameters
+          link["href"] = strip_tracking_parameters(link["href"])
+        end
+      end
+      fragment.to_html
+    end
+    # Removes tracking parameters (UTM parameters) from a URL.
+    #
+    # Preserves all non-tracking query parameters and handles various URL formats including
+    # relative URLs, absolute URLs, and malformed URLs gracefully.
+    #
+    # @param url [String] The URL to clean
+    # @return [String] URL with tracking parameters removed, or original URL if parsing fails
+    #
+    # @example
+    #   strip_tracking_parameters("https://example.com?utm_source=google&page=1")
+    #   # => "https://example.com?page=1"
+    #
+    # @example Removes all tracking parameters
+    #   strip_tracking_parameters("https://example.com?utm_source=test&utm_medium=cpc")
+    #   # => "https://example.com"
+    #
+    # @example Preserves fragments
+    #   strip_tracking_parameters("https://example.com?utm_source=test&page=1#section")
+    #   # => "https://example.com?page=1#section"
+    #
+    # @example Handles relative URLs
+    #   strip_tracking_parameters("/path?utm_source=test&param=keep")
+    #   # => "/path?param=keep"
+    def strip_tracking_parameters(url)
+      return url unless url.include?("?")
+      begin
+        uri = URI.parse(url)
+        return url unless uri.query
+        # Only process URLs that have a valid scheme and host, or are relative URLs
+        unless uri.scheme || url.start_with?("/", "#")
+          return url
+        end
+        # Parse query parameters and filter out tracking ones
+        params = URI.decode_www_form(uri.query)
+        clean_params = params.reject { |param, _| TRACKING_PARAMS.include?(param.downcase) }
+        # Rebuild the URL
+        uri.query = if clean_params.empty?
+          nil
+        else
+          URI.encode_www_form(clean_params)
+        end
+        uri.to_s
+      rescue URI::InvalidURIError
+        # If URL parsing fails, return the original URL
+        url
+      end
+    end
+  end
+end

data/lib/raif/utils.rb CHANGED Viewed

@@ -3,4 +3,5 @@
 module Raif::Utils
   require "raif/utils/readable_content_extractor"
   require "raif/utils/html_to_markdown_converter"
+  require "raif/utils/html_fragment_processor"
 end

data/lib/raif/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Raif
-  VERSION = "1.1.0"
+  VERSION = "1.2.0"
 end

data/lib/raif.rb CHANGED Viewed

@@ -9,8 +9,10 @@ require "raif/utils"
 require "raif/llm_registry"
 require "raif/embedding_model_registry"
 require "raif/json_schema_builder"
+require "raif/migration_checker"
 require "faraday"
+require "event_stream_parser"
 require "json-schema"
 require "loofah"
 require "pagy"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: raif
 version: !ruby/object:Gem::Version
-  version: 1.1.0
+  version: 1.2.0
 platform: ruby
 authors:
 - Ben Roesch
 - Brian Leslie
 bindir: bin
 cert_chain: []
-date: 2025-05-23 00:00:00.000000000 Z
+date: 1980-01-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: aws-sdk-bedrockruntime
@@ -24,6 +24,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: event_stream_parser
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '1.0'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '1.0'
 - !ruby/object:Gem::Dependency
   name: faraday
   requirement: !ruby/object:Gem::Requirement
@@ -175,18 +189,26 @@ files:
 - app/models/raif/concerns/llm_response_parsing.rb
 - app/models/raif/concerns/llm_temperature.rb
 - app/models/raif/concerns/llms/anthropic/message_formatting.rb
-- app/models/raif/concerns/llms/bedrock_claude/message_formatting.rb
+- app/models/raif/concerns/llms/anthropic/tool_formatting.rb
+- app/models/raif/concerns/llms/bedrock/message_formatting.rb
+- app/models/raif/concerns/llms/bedrock/tool_formatting.rb
 - app/models/raif/concerns/llms/message_formatting.rb
-- app/models/raif/concerns/llms/open_ai/message_formatting.rb
+- app/models/raif/concerns/llms/open_ai/json_schema_validation.rb
+- app/models/raif/concerns/llms/open_ai_completions/message_formatting.rb
+- app/models/raif/concerns/llms/open_ai_completions/tool_formatting.rb
+- app/models/raif/concerns/llms/open_ai_responses/message_formatting.rb
+- app/models/raif/concerns/llms/open_ai_responses/tool_formatting.rb
 - app/models/raif/conversation.rb
 - app/models/raif/conversation_entry.rb
 - app/models/raif/embedding_model.rb
-- app/models/raif/embedding_models/bedrock_titan.rb
+- app/models/raif/embedding_models/bedrock.rb
 - app/models/raif/embedding_models/open_ai.rb
 - app/models/raif/llm.rb
 - app/models/raif/llms/anthropic.rb
-- app/models/raif/llms/bedrock_claude.rb
-- app/models/raif/llms/open_ai.rb
+- app/models/raif/llms/bedrock.rb
+- app/models/raif/llms/open_ai_base.rb
+- app/models/raif/llms/open_ai_completions.rb
+- app/models/raif/llms/open_ai_responses.rb
 - app/models/raif/llms/open_router.rb
 - app/models/raif/model_completion.rb
 - app/models/raif/model_file_input.rb
@@ -195,7 +217,15 @@ files:
 - app/models/raif/model_tool_invocation.rb
 - app/models/raif/model_tools/agent_final_answer.rb
 - app/models/raif/model_tools/fetch_url.rb
+- app/models/raif/model_tools/provider_managed/base.rb
+- app/models/raif/model_tools/provider_managed/code_execution.rb
+- app/models/raif/model_tools/provider_managed/image_generation.rb
+- app/models/raif/model_tools/provider_managed/web_search.rb
 - app/models/raif/model_tools/wikipedia_search.rb
+- app/models/raif/streaming_responses/anthropic.rb
+- app/models/raif/streaming_responses/bedrock.rb
+- app/models/raif/streaming_responses/open_ai_completions.rb
+- app/models/raif/streaming_responses/open_ai_responses.rb
 - app/models/raif/task.rb
 - app/models/raif/user_tool_invocation.rb
 - app/views/layouts/raif/admin.html.erb
@@ -218,6 +248,7 @@ files:
 - app/views/raif/admin/tasks/_task.html.erb
 - app/views/raif/admin/tasks/index.html.erb
 - app/views/raif/admin/tasks/show.html.erb
+- app/views/raif/conversation_entries/_citations.html.erb
 - app/views/raif/conversation_entries/_conversation_entry.html.erb
 - app/views/raif/conversation_entries/_form.html.erb
 - app/views/raif/conversation_entries/_form_with_available_tools.html.erb
@@ -242,6 +273,9 @@ files:
 - db/migrate/20250424232946_add_created_at_indexes.rb
 - db/migrate/20250502155330_add_status_indexes_to_raif_tasks.rb
 - db/migrate/20250507155314_add_retry_count_to_raif_model_completions.rb
+- db/migrate/20250527213016_add_response_id_and_response_array_to_model_completions.rb
+- db/migrate/20250603140622_add_citations_to_raif_model_completions.rb
+- db/migrate/20250603202013_add_stream_response_to_raif_model_completions.rb
 - lib/generators/raif/agent/agent_generator.rb
 - lib/generators/raif/agent/templates/agent.rb.tt
 - lib/generators/raif/agent/templates/application_agent.rb.tt
@@ -268,12 +302,15 @@ files:
 - lib/raif/errors/invalid_model_image_input_error.rb
 - lib/raif/errors/invalid_user_tool_type_error.rb
 - lib/raif/errors/open_ai/json_schema_error.rb
+- lib/raif/errors/streaming_error.rb
 - lib/raif/errors/unsupported_feature_error.rb
 - lib/raif/json_schema_builder.rb
 - lib/raif/languages.rb
 - lib/raif/llm_registry.rb
+- lib/raif/migration_checker.rb
 - lib/raif/rspec.rb
 - lib/raif/utils.rb
+- lib/raif/utils/html_fragment_processor.rb
 - lib/raif/utils/html_to_markdown_converter.rb
 - lib/raif/utils/readable_content_extractor.rb
 - lib/raif/version.rb
@@ -300,7 +337,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.2
+rubygems_version: 3.6.7
 specification_version: 4
 summary: Raif (Ruby AI Framework) is a Rails engine that helps you add AI-powered
   features to your Rails apps, such as tasks, conversations, and agents.

data/app/models/raif/llms/open_ai.rb DELETED Viewed

@@ -1,256 +0,0 @@
-# frozen_string_literal: true
-class Raif::Llms::OpenAi < Raif::Llm
-  include Raif::Concerns::Llms::OpenAi::MessageFormatting
-  def perform_model_completion!(model_completion)
-    model_completion.temperature ||= default_temperature
-    parameters = build_request_parameters(model_completion)
-    response = connection.post("chat/completions") do |req|
-      req.body = parameters
-    end
-    response_json = response.body
-    model_completion.update!(
-      response_tool_calls: extract_response_tool_calls(response_json),
-      raw_response: response_json.dig("choices", 0, "message", "content"),
-      completion_tokens: response_json.dig("usage", "completion_tokens"),
-      prompt_tokens: response_json.dig("usage", "prompt_tokens"),
-      total_tokens: response_json.dig("usage", "total_tokens"),
-      response_format_parameter: parameters.dig(:response_format, :type)
-    )
-    model_completion
-  end
-  def connection
-    @connection ||= Faraday.new(url: "https://api.openai.com/v1") do |f|
-      f.headers["Authorization"] = "Bearer #{Raif.config.open_ai_api_key}"
-      f.request :json
-      f.response :json
-      f.response :raise_error
-    end
-  end
-  def validate_json_schema!(schema)
-    return if schema.blank?
-    errors = []
-    # Check if schema is present
-    if schema.blank?
-      errors << "JSON schema must include a 'schema' property"
-    else
-      # Check root object type
-      if schema[:type] != "object" && !schema.key?(:properties)
-        errors << "Root schema must be of type 'object' with 'properties'"
-      end
-      # Check all objects in the schema recursively
-      validate_object_properties(schema, errors)
-      # Check properties count (max 100 total)
-      validate_properties_count(schema, errors)
-      # Check nesting depth (max 5 levels)
-      validate_nesting_depth(schema, errors)
-      # Check for unsupported anyOf at root level
-      if schema[:anyOf].present? && schema[:properties].blank?
-        errors << "Root objects cannot be of type 'anyOf'"
-      end
-    end
-    # Raise error if any validation issues found
-    if errors.any?
-      error_message = "Invalid JSON schema for OpenAI structured outputs: #{errors.join("; ")}\nSchema was: #{schema.inspect}"
-      raise Raif::Errors::OpenAi::JsonSchemaError, error_message
-    else
-      true
-    end
-  end
-private
-  def extract_response_tool_calls(resp)
-    return if resp.dig("choices", 0, "message", "tool_calls").blank?
-    resp.dig("choices", 0, "message", "tool_calls").map do |tool_call|
-      {
-        "name" => tool_call["function"]["name"],
-        "arguments" => JSON.parse(tool_call["function"]["arguments"])
-      }
-    end
-  end
-  def build_request_parameters(model_completion)
-    formatted_system_prompt = model_completion.system_prompt.to_s.strip
-    # If the response format is JSON, we need to include "as json" in the system prompt.
-    # OpenAI requires this and will throw an error if it's not included.
-    if model_completion.response_format_json?
-      # Ensure system prompt ends with a period if not empty
-      if formatted_system_prompt.present? && !formatted_system_prompt.end_with?(".", "?", "!")
-        formatted_system_prompt += "."
-      end
-      formatted_system_prompt += " Return your response as JSON."
-      formatted_system_prompt.strip!
-    end
-    messages = model_completion.messages
-    messages_with_system = if !formatted_system_prompt.empty?
-      [{ "role" => "system", "content" => formatted_system_prompt }] + messages
-    else
-      messages
-    end
-    parameters = {
-      model: api_name,
-      messages: messages_with_system,
-      temperature: model_completion.temperature.to_f
-    }
-    # If the LLM supports native tool use and there are available tools, add them to the parameters
-    if supports_native_tool_use? && model_completion.available_model_tools.any?
-      parameters[:tools] = model_completion.available_model_tools_map.map do |_tool_name, tool|
-        validate_json_schema!(tool.tool_arguments_schema)
-        {
-          type: "function",
-          function: {
-            name: tool.tool_name,
-            description: tool.tool_description,
-            parameters: tool.tool_arguments_schema
-          }
-        }
-      end
-    end
-    # Add response format if needed
-    response_format = determine_response_format(model_completion)
-    parameters[:response_format] = response_format if response_format
-    parameters
-  end
-  def determine_response_format(model_completion)
-    # Only configure response format for JSON outputs
-    return unless model_completion.response_format_json?
-    if model_completion.json_response_schema.present? && supports_structured_outputs?
-      validate_json_schema!(model_completion.json_response_schema)
-      {
-        type: "json_schema",
-        json_schema: {
-          name: "json_response_schema",
-          strict: true,
-          schema: model_completion.json_response_schema
-        }
-      }
-    else
-      # Default JSON mode for OpenAI models that don't support structured outputs or no schema is provided
-      { type: "json_object" }
-    end
-  end
-  def supports_structured_outputs?
-    # Not all OpenAI models support structured outputs:
-    # https://platform.openai.com/docs/guides/structured-outputs?api-mode=chat#supported-models
-    provider_settings.key?(:supports_structured_outputs) ? provider_settings[:supports_structured_outputs] : true
-  end
-  def validate_object_properties(schema, errors)
-    return unless schema.is_a?(Hash)
-    # Check if the current schema is an object and validate additionalProperties and required fields
-    if schema[:type] == "object"
-      if schema[:additionalProperties] != false
-        errors << "All objects must have 'additionalProperties' set to false"
-      end
-      # Check that all properties are required
-      if schema[:properties].is_a?(Hash) && schema[:properties].any?
-        property_keys = schema[:properties].keys
-        required_fields = schema[:required] || []
-        if required_fields.sort != property_keys.map(&:to_s).sort
-          errors << "All object properties must be listed in the 'required' array"
-        end
-      end
-    end
-    # Check if the current schema is an object and validate additionalProperties
-    if schema[:type] == "object"
-      if schema[:additionalProperties] != false
-        errors << "All objects must have 'additionalProperties' set to false"
-      end
-      # Check properties of the object recursively
-      if schema[:properties].is_a?(Hash)
-        schema[:properties].each_value do |property|
-          validate_object_properties(property, errors)
-        end
-      end
-    end
-    # Check array items
-    if schema[:type] == "array" && schema[:items].is_a?(Hash)
-      validate_object_properties(schema[:items], errors)
-    end
-    # Check anyOf
-    if schema[:anyOf].is_a?(Array)
-      schema[:anyOf].each do |option|
-        validate_object_properties(option, errors)
-      end
-    end
-  end
-  def validate_properties_count(schema, errors, count = 0)
-    return count unless schema.is_a?(Hash)
-    if schema[:properties].is_a?(Hash)
-      count += schema[:properties].size
-      if count > 100
-        errors << "Schema exceeds maximum of 100 total object properties"
-        return count
-      end
-      # Check nested properties
-      schema[:properties].each_value do |property|
-        count = validate_properties_count(property, errors, count)
-      end
-    end
-    # Check array items
-    if schema[:type] == "array" && schema[:items].is_a?(Hash)
-      count = validate_properties_count(schema[:items], errors, count)
-    end
-    count
-  end
-  def validate_nesting_depth(schema, errors, depth = 1)
-    return unless schema.is_a?(Hash)
-    if depth > 5
-      errors << "Schema exceeds maximum nesting depth of 5 levels"
-      return
-    end
-    if schema[:properties].is_a?(Hash)
-      schema[:properties].each_value do |property|
-        validate_nesting_depth(property, errors, depth + 1)
-      end
-    end
-    # Check array items
-    if schema[:type] == "array" && schema[:items].is_a?(Hash)
-      validate_nesting_depth(schema[:items], errors, depth + 1)
-    end
-  end
-end