exaonruby 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +614 -0
- data/exaonruby.gemspec +37 -0
- data/exe/exa +7 -0
- data/lib/exa/cli.rb +458 -0
- data/lib/exa/client.rb +210 -0
- data/lib/exa/configuration.rb +81 -0
- data/lib/exa/endpoints/answer.rb +109 -0
- data/lib/exa/endpoints/contents.rb +141 -0
- data/lib/exa/endpoints/events.rb +71 -0
- data/lib/exa/endpoints/find_similar.rb +154 -0
- data/lib/exa/endpoints/imports.rb +145 -0
- data/lib/exa/endpoints/monitors.rb +193 -0
- data/lib/exa/endpoints/research.rb +158 -0
- data/lib/exa/endpoints/search.rb +195 -0
- data/lib/exa/endpoints/webhooks.rb +161 -0
- data/lib/exa/endpoints/webset_enrichments.rb +162 -0
- data/lib/exa/endpoints/webset_items.rb +90 -0
- data/lib/exa/endpoints/webset_searches.rb +137 -0
- data/lib/exa/endpoints/websets.rb +214 -0
- data/lib/exa/errors.rb +180 -0
- data/lib/exa/resources/answer_response.rb +101 -0
- data/lib/exa/resources/base.rb +56 -0
- data/lib/exa/resources/contents_response.rb +123 -0
- data/lib/exa/resources/event.rb +84 -0
- data/lib/exa/resources/import.rb +137 -0
- data/lib/exa/resources/monitor.rb +205 -0
- data/lib/exa/resources/paginated_response.rb +87 -0
- data/lib/exa/resources/research_task.rb +165 -0
- data/lib/exa/resources/search_response.rb +111 -0
- data/lib/exa/resources/search_result.rb +95 -0
- data/lib/exa/resources/webhook.rb +152 -0
- data/lib/exa/resources/webset.rb +491 -0
- data/lib/exa/resources/webset_item.rb +256 -0
- data/lib/exa/utils/parameter_converter.rb +159 -0
- data/lib/exa/utils/webhook_handler.rb +239 -0
- data/lib/exa/version.rb +7 -0
- data/lib/exa.rb +130 -0
- metadata +146 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# typed: strict
|
|
4
|
+
|
|
5
|
+
module Exa
|
|
6
|
+
class Configuration
|
|
7
|
+
# @return [String, nil] API key for authentication
|
|
8
|
+
attr_accessor :api_key
|
|
9
|
+
|
|
10
|
+
# @return [String] Base URL for the Exa API
|
|
11
|
+
attr_accessor :base_url
|
|
12
|
+
|
|
13
|
+
# @return [String] Base URL for the Websets API
|
|
14
|
+
attr_accessor :websets_base_url
|
|
15
|
+
|
|
16
|
+
# @return [Integer] Request timeout in seconds
|
|
17
|
+
attr_accessor :timeout
|
|
18
|
+
|
|
19
|
+
# @return [Integer] Number of retries for transient failures
|
|
20
|
+
attr_accessor :max_retries
|
|
21
|
+
|
|
22
|
+
# @return [Float] Initial retry delay in seconds
|
|
23
|
+
attr_accessor :retry_delay
|
|
24
|
+
|
|
25
|
+
# @return [Float] Maximum retry delay in seconds
|
|
26
|
+
attr_accessor :max_retry_delay
|
|
27
|
+
|
|
28
|
+
# @return [Array<Integer>] HTTP status codes to retry on
|
|
29
|
+
attr_accessor :retry_statuses
|
|
30
|
+
|
|
31
|
+
# @return [Array<Class>] Exception classes to retry on
|
|
32
|
+
attr_accessor :retry_exceptions
|
|
33
|
+
|
|
34
|
+
DEFAULT_BASE_URL = "https://api.exa.ai"
|
|
35
|
+
DEFAULT_WEBSETS_BASE_URL = "https://api.exa.ai/websets/v0"
|
|
36
|
+
DEFAULT_TIMEOUT = 60
|
|
37
|
+
DEFAULT_MAX_RETRIES = 3
|
|
38
|
+
DEFAULT_RETRY_DELAY = 0.5
|
|
39
|
+
DEFAULT_MAX_RETRY_DELAY = 30.0
|
|
40
|
+
DEFAULT_RETRY_STATUSES = [429, 500, 502, 503, 504].freeze
|
|
41
|
+
DEFAULT_RETRY_EXCEPTIONS = [
|
|
42
|
+
Faraday::TimeoutError,
|
|
43
|
+
Faraday::ConnectionFailed
|
|
44
|
+
].freeze
|
|
45
|
+
|
|
46
|
+
def initialize
|
|
47
|
+
@api_key = ENV.fetch("EXA_API_KEY", nil)
|
|
48
|
+
@base_url = DEFAULT_BASE_URL
|
|
49
|
+
@websets_base_url = DEFAULT_WEBSETS_BASE_URL
|
|
50
|
+
@timeout = DEFAULT_TIMEOUT
|
|
51
|
+
@max_retries = DEFAULT_MAX_RETRIES
|
|
52
|
+
@retry_delay = DEFAULT_RETRY_DELAY
|
|
53
|
+
@max_retry_delay = DEFAULT_MAX_RETRY_DELAY
|
|
54
|
+
@retry_statuses = DEFAULT_RETRY_STATUSES.dup
|
|
55
|
+
@retry_exceptions = DEFAULT_RETRY_EXCEPTIONS.dup
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Validates that required configuration is present
|
|
59
|
+
# @raise [Exa::AuthenticationError] if API key is missing
|
|
60
|
+
# @return [void]
|
|
61
|
+
def validate!
|
|
62
|
+
raise AuthenticationError, "API key is required. Set via Exa.configure or EXA_API_KEY environment variable." unless api_key
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Creates a duplicate of the configuration
|
|
66
|
+
# @return [Configuration] duplicated configuration
|
|
67
|
+
def dup
|
|
68
|
+
config = Configuration.new
|
|
69
|
+
config.api_key = api_key
|
|
70
|
+
config.base_url = base_url
|
|
71
|
+
config.websets_base_url = websets_base_url
|
|
72
|
+
config.timeout = timeout
|
|
73
|
+
config.max_retries = max_retries
|
|
74
|
+
config.retry_delay = retry_delay
|
|
75
|
+
config.max_retry_delay = max_retry_delay
|
|
76
|
+
config.retry_statuses = retry_statuses.dup
|
|
77
|
+
config.retry_exceptions = retry_exceptions.dup
|
|
78
|
+
config
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# typed: strict
|
|
4
|
+
|
|
5
|
+
module Exa
|
|
6
|
+
module Endpoints
|
|
7
|
+
module Answer
|
|
8
|
+
# Get an LLM answer to a question informed by Exa search results
|
|
9
|
+
#
|
|
10
|
+
# The /answer endpoint performs an Exa search and uses an LLM to generate either:
|
|
11
|
+
# 1. A direct answer for specific queries (e.g., "What is the capital of France?" returns "Paris")
|
|
12
|
+
# 2. A detailed summary with citations for open-ended queries
|
|
13
|
+
#
|
|
14
|
+
# @param query [String] The question or query to answer
|
|
15
|
+
# @param text [Boolean] If true, includes full text content in search results
|
|
16
|
+
# @param stream [Boolean] If true, response is returned as server-sent events stream
|
|
17
|
+
# @param search_options [Hash] Additional search options to customize the underlying search
|
|
18
|
+
# @option search_options [Symbol] :type Search type (:neural, :auto, :fast, :deep)
|
|
19
|
+
# @option search_options [Integer] :num_results Number of results to use
|
|
20
|
+
# @option search_options [Array<String>] :include_domains Domains to include
|
|
21
|
+
# @option search_options [Array<String>] :exclude_domains Domains to exclude
|
|
22
|
+
# @option search_options [String] :start_published_date Only results published after this date
|
|
23
|
+
# @option search_options [String] :end_published_date Only results published before this date
|
|
24
|
+
#
|
|
25
|
+
# @return [Exa::Resources::AnswerResponse] Answer with citations
|
|
26
|
+
#
|
|
27
|
+
# @raise [Exa::InvalidRequestError] if parameters are invalid
|
|
28
|
+
# @raise [Exa::AuthenticationError] if API key is invalid
|
|
29
|
+
# @raise [Exa::RateLimitError] if rate limit is exceeded
|
|
30
|
+
#
|
|
31
|
+
# @example Get a direct answer
|
|
32
|
+
# response = client.answer("What is the latest valuation of SpaceX?")
|
|
33
|
+
# puts response.answer
|
|
34
|
+
# response.citations.each { |c| puts "Source: #{c.url}" }
|
|
35
|
+
#
|
|
36
|
+
# @example Answer with search options
|
|
37
|
+
# response = client.answer(
|
|
38
|
+
# "What are the latest AI safety developments?",
|
|
39
|
+
# text: true,
|
|
40
|
+
# search_options: {
|
|
41
|
+
# num_results: 10,
|
|
42
|
+
# start_published_date: "2024-01-01T00:00:00.000Z"
|
|
43
|
+
# }
|
|
44
|
+
# )
|
|
45
|
+
def answer(query, text: nil, stream: false, **search_options)
|
|
46
|
+
raise InvalidRequestError, "query must be a non-empty string" if !query.is_a?(String) || query.empty?
|
|
47
|
+
|
|
48
|
+
params = build_answer_params(query, text, stream, search_options)
|
|
49
|
+
response = post("/answer", params)
|
|
50
|
+
|
|
51
|
+
Resources::AnswerResponse.new(
|
|
52
|
+
Utils::ParameterConverter.from_api_response(response)
|
|
53
|
+
)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# @param query [String] The question
|
|
59
|
+
# @param text [Boolean, nil] Include text content
|
|
60
|
+
# @param stream [Boolean] Stream response
|
|
61
|
+
# @param search_options [Hash] Additional search options
|
|
62
|
+
# @return [Hash] API-formatted parameters
|
|
63
|
+
def build_answer_params(query, text, stream, search_options)
|
|
64
|
+
params = { query: query }
|
|
65
|
+
|
|
66
|
+
params[:text] = text unless text.nil?
|
|
67
|
+
params[:stream] = stream if stream
|
|
68
|
+
|
|
69
|
+
if search_options[:type]
|
|
70
|
+
params[:type] = search_options[:type].to_s
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
if search_options[:num_results]
|
|
74
|
+
params[:numResults] = search_options[:num_results]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
if search_options[:include_domains]
|
|
78
|
+
params[:includeDomains] = search_options[:include_domains]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
if search_options[:exclude_domains]
|
|
82
|
+
params[:excludeDomains] = search_options[:exclude_domains]
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
if search_options[:start_published_date]
|
|
86
|
+
params[:startPublishedDate] = Utils::ParameterConverter.format_date(search_options[:start_published_date])
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
if search_options[:end_published_date]
|
|
90
|
+
params[:endPublishedDate] = Utils::ParameterConverter.format_date(search_options[:end_published_date])
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
if search_options[:start_crawl_date]
|
|
94
|
+
params[:startCrawlDate] = Utils::ParameterConverter.format_date(search_options[:start_crawl_date])
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
if search_options[:end_crawl_date]
|
|
98
|
+
params[:endCrawlDate] = Utils::ParameterConverter.format_date(search_options[:end_crawl_date])
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
if search_options[:category]
|
|
102
|
+
params[:category] = search_options[:category].to_s.tr("_", " ")
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
params
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# typed: strict
|
|
4
|
+
|
|
5
|
+
module Exa
|
|
6
|
+
module Endpoints
|
|
7
|
+
module Contents
|
|
8
|
+
VALID_LIVECRAWL_OPTIONS = %i[never fallback preferred always].freeze
|
|
9
|
+
|
|
10
|
+
# Get the full page contents, summaries, and metadata for a list of URLs
|
|
11
|
+
#
|
|
12
|
+
# Returns instant results from Exa's cache, with automatic live crawling
|
|
13
|
+
# as fallback for uncached pages.
|
|
14
|
+
#
|
|
15
|
+
# @param urls [Array<String>] URLs to fetch content from
|
|
16
|
+
# @param text [Boolean, Hash] Return text content. Hash for options: { max_characters: 1000 }
|
|
17
|
+
# @param highlights [Boolean, Hash] Return highlights. Hash for options: { num_sentences: 3, highlight_query: "..." }
|
|
18
|
+
# @param summary [Boolean, Hash] Return summary. Hash for options: { query: "..." }
|
|
19
|
+
# @param livecrawl [Symbol] Livecrawl option: :never, :fallback (default), :preferred, :always
|
|
20
|
+
# @param livecrawl_timeout [Integer] Livecrawl timeout in milliseconds
|
|
21
|
+
# @param subpages [Integer] Number of subpages to crawl
|
|
22
|
+
# @param subpage_target [String, Array<String>] Terms to find specific subpages
|
|
23
|
+
# @param context [Boolean, Integer] Return context string for LLM. Integer for max characters
|
|
24
|
+
#
|
|
25
|
+
# @return [Exa::Resources::ContentsResponse] Contents results with statuses
|
|
26
|
+
#
|
|
27
|
+
# @raise [Exa::InvalidRequestError] if parameters are invalid
|
|
28
|
+
# @raise [Exa::AuthenticationError] if API key is invalid
|
|
29
|
+
# @raise [Exa::RateLimitError] if rate limit is exceeded
|
|
30
|
+
#
|
|
31
|
+
# @example Basic content retrieval
|
|
32
|
+
# client.get_contents(["https://example.com/article"])
|
|
33
|
+
#
|
|
34
|
+
# @example With text and summary
|
|
35
|
+
# client.get_contents(
|
|
36
|
+
# ["https://arxiv.org/abs/2307.06435"],
|
|
37
|
+
# text: true,
|
|
38
|
+
# summary: true
|
|
39
|
+
# )
|
|
40
|
+
#
|
|
41
|
+
# @example With livecrawl and subpages
|
|
42
|
+
# client.get_contents(
|
|
43
|
+
# ["https://example.com"],
|
|
44
|
+
# livecrawl: :preferred,
|
|
45
|
+
# subpages: 3,
|
|
46
|
+
# subpage_target: "about"
|
|
47
|
+
# )
|
|
48
|
+
def get_contents(urls, **options)
|
|
49
|
+
validate_contents_options!(urls, options)
|
|
50
|
+
|
|
51
|
+
params = build_contents_params(urls, options)
|
|
52
|
+
response = post("/contents", params)
|
|
53
|
+
|
|
54
|
+
Resources::ContentsResponse.new(
|
|
55
|
+
Utils::ParameterConverter.from_api_response(response)
|
|
56
|
+
)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# @param urls [Array<String>] URLs to validate
|
|
62
|
+
# @param options [Hash] Options to validate
|
|
63
|
+
# @raise [Exa::InvalidRequestError] if validation fails
|
|
64
|
+
def validate_contents_options!(urls, options)
|
|
65
|
+
raise InvalidRequestError, "urls must be a non-empty array" if !urls.is_a?(Array) || urls.empty?
|
|
66
|
+
|
|
67
|
+
urls.each do |url|
|
|
68
|
+
raise InvalidRequestError, "Each URL must be a string" unless url.is_a?(String)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if options[:livecrawl] && !VALID_LIVECRAWL_OPTIONS.include?(options[:livecrawl])
|
|
72
|
+
raise InvalidRequestError, "Invalid livecrawl option: #{options[:livecrawl]}. Valid options: #{VALID_LIVECRAWL_OPTIONS.join(", ")}"
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
if options[:subpages] && options[:subpages] < 0
|
|
76
|
+
raise InvalidRequestError, "subpages must be a non-negative integer"
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# @param urls [Array<String>] URLs to fetch
|
|
81
|
+
# @param options [Hash] Content options
|
|
82
|
+
# @return [Hash] API-formatted parameters
|
|
83
|
+
def build_contents_params(urls, options)
|
|
84
|
+
params = { urls: urls }
|
|
85
|
+
|
|
86
|
+
add_contents_options!(params, options)
|
|
87
|
+
|
|
88
|
+
params
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @param params [Hash] Parameters to modify
|
|
92
|
+
# @param options [Hash] Source options
|
|
93
|
+
def add_contents_options!(params, options)
|
|
94
|
+
params[:text] = format_content_param(options[:text]) if options.key?(:text)
|
|
95
|
+
params[:highlights] = format_highlights_param(options[:highlights]) if options.key?(:highlights)
|
|
96
|
+
params[:summary] = format_summary_param(options[:summary]) if options.key?(:summary)
|
|
97
|
+
|
|
98
|
+
if options.key?(:context)
|
|
99
|
+
params[:context] = options[:context].is_a?(Integer) ? { maxCharacters: options[:context] } : options[:context]
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
params[:livecrawl] = options[:livecrawl].to_s if options[:livecrawl]
|
|
103
|
+
params[:livecrawlTimeout] = options[:livecrawl_timeout] if options[:livecrawl_timeout]
|
|
104
|
+
params[:subpages] = options[:subpages] if options[:subpages]
|
|
105
|
+
params[:subpageTarget] = options[:subpage_target] if options[:subpage_target]
|
|
106
|
+
|
|
107
|
+
add_extras!(params, options) if options[:extras]
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# @param option [Boolean, Hash] Content option
|
|
111
|
+
# @return [Boolean, Hash] Formatted option
|
|
112
|
+
def format_content_param(option)
|
|
113
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
114
|
+
|
|
115
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# @param option [Boolean, Hash] Highlights option
|
|
119
|
+
# @return [Boolean, Hash] Formatted option
|
|
120
|
+
def format_highlights_param(option)
|
|
121
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
122
|
+
|
|
123
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# @param option [Boolean, Hash] Summary option
|
|
127
|
+
# @return [Boolean, Hash] Formatted option
|
|
128
|
+
def format_summary_param(option)
|
|
129
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
130
|
+
|
|
131
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# @param params [Hash] Parameters to modify
|
|
135
|
+
# @param options [Hash] Source options
|
|
136
|
+
def add_extras!(params, options)
|
|
137
|
+
params[:extras] = Utils::ParameterConverter.to_api_params(options[:extras])
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# typed: strict
|
|
4
|
+
|
|
5
|
+
module Exa
|
|
6
|
+
module Endpoints
|
|
7
|
+
module Events
|
|
8
|
+
VALID_EVENT_TYPES = %w[
|
|
9
|
+
webset.created webset.deleted webset.paused webset.idle
|
|
10
|
+
webset.search.created webset.search.canceled webset.search.completed webset.search.updated
|
|
11
|
+
import.created import.completed
|
|
12
|
+
webset.item.created webset.item.enriched
|
|
13
|
+
monitor.created monitor.updated monitor.deleted
|
|
14
|
+
monitor.run.created monitor.run.completed
|
|
15
|
+
webset.export.created webset.export.completed
|
|
16
|
+
].freeze
|
|
17
|
+
|
|
18
|
+
# Gets an Event by ID
|
|
19
|
+
#
|
|
20
|
+
# @param event_id [String] Event ID
|
|
21
|
+
# @return [Exa::Resources::Event] The Event
|
|
22
|
+
#
|
|
23
|
+
# @example Get an event
|
|
24
|
+
# event = client.get_event("evt_abc123")
|
|
25
|
+
# puts "Type: #{event.type}"
|
|
26
|
+
# puts "Data: #{event.data}"
|
|
27
|
+
def get_event(event_id)
|
|
28
|
+
raise InvalidRequestError, "event_id must be a non-empty string" if !event_id.is_a?(String) || event_id.empty?
|
|
29
|
+
|
|
30
|
+
response = websets_get("/events/#{event_id}")
|
|
31
|
+
|
|
32
|
+
Resources::Event.new(
|
|
33
|
+
Utils::ParameterConverter.from_api_response(response)
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Lists Events with optional filtering
|
|
38
|
+
#
|
|
39
|
+
# @param cursor [String, nil] Cursor for pagination
|
|
40
|
+
# @param limit [Integer, nil] Number of results per page
|
|
41
|
+
# @param types [Array<String>, nil] Filter by event types
|
|
42
|
+
# @param webset_id [String, nil] Filter by Webset ID
|
|
43
|
+
# @param after [String, Time, nil] Events after this timestamp
|
|
44
|
+
# @param before [String, Time, nil] Events before this timestamp
|
|
45
|
+
#
|
|
46
|
+
# @return [Exa::Resources::EventListResponse] Paginated list of Events
|
|
47
|
+
#
|
|
48
|
+
# @example List recent item events
|
|
49
|
+
# response = client.list_events(
|
|
50
|
+
# types: ["webset.item.created", "webset.item.enriched"],
|
|
51
|
+
# limit: 50
|
|
52
|
+
# )
|
|
53
|
+
# response.data.each { |e| puts "#{e.type} at #{e.created_at}" }
|
|
54
|
+
def list_events(cursor: nil, limit: nil, types: nil, webset_id: nil, after: nil, before: nil)
|
|
55
|
+
params = {}
|
|
56
|
+
params[:cursor] = cursor if cursor
|
|
57
|
+
params[:limit] = limit if limit
|
|
58
|
+
params[:types] = types if types
|
|
59
|
+
params[:websetId] = webset_id if webset_id
|
|
60
|
+
params[:after] = Utils::ParameterConverter.format_date(after) if after
|
|
61
|
+
params[:before] = Utils::ParameterConverter.format_date(before) if before
|
|
62
|
+
|
|
63
|
+
response = websets_get("/events", params)
|
|
64
|
+
|
|
65
|
+
Resources::EventListResponse.new(
|
|
66
|
+
Utils::ParameterConverter.from_api_response(response)
|
|
67
|
+
)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# typed: strict
|
|
4
|
+
|
|
5
|
+
module Exa
|
|
6
|
+
module Endpoints
|
|
7
|
+
module FindSimilar
|
|
8
|
+
VALID_LIVECRAWL_OPTIONS = %i[never fallback preferred always].freeze
|
|
9
|
+
|
|
10
|
+
# Find similar links to the provided URL
|
|
11
|
+
#
|
|
12
|
+
# Uses Exa's embeddings-based model to find pages that are semantically
|
|
13
|
+
# similar to the source URL. Optionally returns the contents of found pages.
|
|
14
|
+
#
|
|
15
|
+
# @param url [String] The source URL to find similar links for
|
|
16
|
+
# @param num_results [Integer] Number of results to return (max 100)
|
|
17
|
+
# @param include_domains [Array<String>] Domains to include in results
|
|
18
|
+
# @param exclude_domains [Array<String>] Domains to exclude from results
|
|
19
|
+
# @param start_crawl_date [String, Time] Results crawled after this date (ISO 8601)
|
|
20
|
+
# @param end_crawl_date [String, Time] Results crawled before this date (ISO 8601)
|
|
21
|
+
# @param start_published_date [String, Time] Results published after this date (ISO 8601)
|
|
22
|
+
# @param end_published_date [String, Time] Results published before this date (ISO 8601)
|
|
23
|
+
# @param include_text [Array<String>] Strings that must be present in page text
|
|
24
|
+
# @param exclude_text [Array<String>] Strings that must not be present in page text
|
|
25
|
+
# @param text [Boolean, Hash] Return text content. Hash for options: { max_characters: 1000 }
|
|
26
|
+
# @param highlights [Boolean, Hash] Return highlights. Hash for options: { num_sentences: 3, highlight_query: "..." }
|
|
27
|
+
# @param summary [Boolean, Hash] Return summary. Hash for options: { query: "..." }
|
|
28
|
+
# @param context [Boolean, Integer] Return context string for LLM. Integer for max characters
|
|
29
|
+
# @param moderation [Boolean] Enable content moderation
|
|
30
|
+
#
|
|
31
|
+
# @return [Exa::Resources::SearchResponse] Similar links results
|
|
32
|
+
#
|
|
33
|
+
# @raise [Exa::InvalidRequestError] if parameters are invalid
|
|
34
|
+
# @raise [Exa::AuthenticationError] if API key is invalid
|
|
35
|
+
# @raise [Exa::RateLimitError] if rate limit is exceeded
|
|
36
|
+
#
|
|
37
|
+
# @example Basic similar links search
|
|
38
|
+
# client.find_similar("https://arxiv.org/abs/2307.06435")
|
|
39
|
+
#
|
|
40
|
+
# @example With content and filters
|
|
41
|
+
# client.find_similar(
|
|
42
|
+
# "https://arxiv.org/abs/2307.06435",
|
|
43
|
+
# text: true,
|
|
44
|
+
# num_results: 20,
|
|
45
|
+
# include_domains: ["arxiv.org", "paperswithcode.com"]
|
|
46
|
+
# )
|
|
47
|
+
def find_similar(url, **options)
|
|
48
|
+
validate_find_similar_options!(url, options)
|
|
49
|
+
|
|
50
|
+
params = build_find_similar_params(url, options)
|
|
51
|
+
response = post("/findSimilar", params)
|
|
52
|
+
|
|
53
|
+
Resources::SearchResponse.new(
|
|
54
|
+
Utils::ParameterConverter.from_api_response(response)
|
|
55
|
+
)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
# @param url [String] URL to validate
|
|
61
|
+
# @param options [Hash] Options to validate
|
|
62
|
+
# @raise [Exa::InvalidRequestError] if validation fails
|
|
63
|
+
def validate_find_similar_options!(url, options)
|
|
64
|
+
raise InvalidRequestError, "url must be a non-empty string" if !url.is_a?(String) || url.empty?
|
|
65
|
+
|
|
66
|
+
if options[:num_results] && (options[:num_results] < 1 || options[:num_results] > 100)
|
|
67
|
+
raise InvalidRequestError, "num_results must be between 1 and 100"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# @param url [String] Source URL
|
|
72
|
+
# @param options [Hash] Search options
|
|
73
|
+
# @return [Hash] API-formatted parameters
|
|
74
|
+
def build_find_similar_params(url, options)
|
|
75
|
+
params = { url: url }
|
|
76
|
+
|
|
77
|
+
params[:numResults] = options[:num_results] if options[:num_results]
|
|
78
|
+
params[:moderation] = options[:moderation] if options.key?(:moderation)
|
|
79
|
+
|
|
80
|
+
add_find_similar_domain_filters!(params, options)
|
|
81
|
+
add_find_similar_date_filters!(params, options)
|
|
82
|
+
add_find_similar_text_filters!(params, options)
|
|
83
|
+
add_find_similar_content_options!(params, options)
|
|
84
|
+
|
|
85
|
+
params
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# @param params [Hash] Parameters to modify
|
|
89
|
+
# @param options [Hash] Source options
|
|
90
|
+
def add_find_similar_domain_filters!(params, options)
|
|
91
|
+
params[:includeDomains] = options[:include_domains] if options[:include_domains]
|
|
92
|
+
params[:excludeDomains] = options[:exclude_domains] if options[:exclude_domains]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# @param params [Hash] Parameters to modify
|
|
96
|
+
# @param options [Hash] Source options
|
|
97
|
+
def add_find_similar_date_filters!(params, options)
|
|
98
|
+
params[:startCrawlDate] = format_find_similar_date(options[:start_crawl_date]) if options[:start_crawl_date]
|
|
99
|
+
params[:endCrawlDate] = format_find_similar_date(options[:end_crawl_date]) if options[:end_crawl_date]
|
|
100
|
+
params[:startPublishedDate] = format_find_similar_date(options[:start_published_date]) if options[:start_published_date]
|
|
101
|
+
params[:endPublishedDate] = format_find_similar_date(options[:end_published_date]) if options[:end_published_date]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @param params [Hash] Parameters to modify
|
|
105
|
+
# @param options [Hash] Source options
|
|
106
|
+
def add_find_similar_text_filters!(params, options)
|
|
107
|
+
params[:includeText] = options[:include_text] if options[:include_text]
|
|
108
|
+
params[:excludeText] = options[:exclude_text] if options[:exclude_text]
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# @param params [Hash] Parameters to modify
|
|
112
|
+
# @param options [Hash] Source options
|
|
113
|
+
def add_find_similar_content_options!(params, options)
|
|
114
|
+
params[:text] = format_find_similar_content_option(options[:text]) if options.key?(:text)
|
|
115
|
+
params[:highlights] = format_find_similar_highlights_option(options[:highlights]) if options.key?(:highlights)
|
|
116
|
+
params[:summary] = format_find_similar_summary_option(options[:summary]) if options.key?(:summary)
|
|
117
|
+
|
|
118
|
+
if options.key?(:context)
|
|
119
|
+
params[:context] = options[:context].is_a?(Integer) ? { maxCharacters: options[:context] } : options[:context]
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# @param option [Boolean, Hash] Content option
|
|
124
|
+
# @return [Boolean, Hash] Formatted option
|
|
125
|
+
def format_find_similar_content_option(option)
|
|
126
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
127
|
+
|
|
128
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# @param option [Boolean, Hash] Highlights option
|
|
132
|
+
# @return [Boolean, Hash] Formatted option
|
|
133
|
+
def format_find_similar_highlights_option(option)
|
|
134
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
135
|
+
|
|
136
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @param option [Boolean, Hash] Summary option
|
|
140
|
+
# @return [Boolean, Hash] Formatted option
|
|
141
|
+
def format_find_similar_summary_option(option)
|
|
142
|
+
return option if option.is_a?(TrueClass) || option.is_a?(FalseClass)
|
|
143
|
+
|
|
144
|
+
Utils::ParameterConverter.to_api_params(option)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
# @param value [Time, Date, String, nil] Date value
|
|
148
|
+
# @return [String, nil] ISO 8601 formatted string
|
|
149
|
+
def format_find_similar_date(value)
|
|
150
|
+
Utils::ParameterConverter.format_date(value)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|