exa-ai 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ca5a6bcb0b981d51fcc93e1dc9f1e7037d1d6346e082826e1b1c003d586cce45
4
- data.tar.gz: e598e0c91c2815a5abea958adb9f5112def140b78f13ba29872d07009c4056cc
3
+ metadata.gz: 5cfcc57c0090503ea4b27748c7ade968fea3768f0793bd4939fd4debfb8bd7e0
4
+ data.tar.gz: 2cff698538f70030cfccdcd3a91305e4696e8f5d78c7c0d4b110e44625109a6e
5
5
  SHA512:
6
- metadata.gz: 4b648ebe500a28dbb8d31aa936fd0d30a09ff94c372a3f3caec463d648993ae26473fc9724aea0157395b8e78ac9a25fc3f2752c8d6fd1cc68745bb8758b0ca7
7
- data.tar.gz: 512d79543838b4b82ba263586c9cd029f8e19f350ff9c744b85dcb5b8f4e62236b442b70d07316721bc844f6ca9ac79bac6c1b42d57d5f0ef1214c5be407818e
6
+ metadata.gz: 4bd5ff3554ae0b7b8159411e516531a7b3bdb5ae1249c95a88a8522cfb7c0c51d904e41e2fb5d3b1e3f19c9f61ab3f154c53e76e05aa7220971ec4e160edf141
7
+ data.tar.gz: 56233afd37573951715d5747585e3fb3ae59c3df38236d75a9f75a31459a38fa459386702487fbf01217fae72aa833b61a2a0968b903f19ab7f88e5d45a41cbd
data/README.md CHANGED
@@ -214,7 +214,7 @@ exa-ai search "tutorials" \
214
214
  exa-ai search "AI" --output-format pretty
215
215
  ```
216
216
 
217
- **Options:**
217
+ **Basic Options:**
218
218
  - `QUERY` - Search query (required)
219
219
  - `--num-results N` - Number of results (default: 10)
220
220
  - `--type TYPE` - Search type: keyword, neural, or auto (default: auto)
@@ -224,6 +224,141 @@ exa-ai search "AI" --output-format pretty
224
224
  - `--output-format FORMAT` - json or pretty (default: json)
225
225
  - `--api-key KEY` - API key (or set EXA_API_KEY env var)
226
226
 
227
+ #### Advanced Search Options
228
+
229
+ **Date Filtering:**
230
+ ```bash
231
+ # Filter by published date
232
+ exa-ai search "AI research" \
233
+ --start-published-date "2025-01-01T00:00:00.000Z" \
234
+ --end-published-date "2025-12-31T23:59:59.999Z"
235
+
236
+ # Filter by crawl date
237
+ exa-ai search "news" \
238
+ --start-crawl-date "2025-10-01T00:00:00.000Z" \
239
+ --end-crawl-date "2025-10-31T23:59:59.999Z"
240
+ ```
241
+
242
+ **Text Filtering:**
243
+ ```bash
244
+ # Results must include specific phrase
245
+ exa-ai search "machine learning" --include-text "neural networks"
246
+
247
+ # Results must exclude specific phrase
248
+ exa-ai search "programming" --exclude-text "paid-partnership"
249
+
250
+ # Combine inclusion and exclusion
251
+ exa-ai search "Python" \
252
+ --include-text "open source" \
253
+ --exclude-text "deprecated"
254
+ ```
255
+
256
+ **Content Extraction:**
257
+ ```bash
258
+ # Extract full webpage text
259
+ exa-ai search "Ruby" --text
260
+
261
+ # Extract text with options
262
+ exa-ai search "AI" \
263
+ --text \
264
+ --text-max-characters 3000 \
265
+ --include-html-tags
266
+
267
+ # Generate AI summaries
268
+ exa-ai search "climate change" \
269
+ --summary \
270
+ --summary-query "What are the main points?"
271
+
272
+ # Format results as context for LLM RAG
273
+ exa-ai search "kubernetes" \
274
+ --context \
275
+ --context-max-characters 5000
276
+
277
+ # Crawl subpages
278
+ exa-ai search "documentation" \
279
+ --subpages 1 \
280
+ --subpage-target about \
281
+ --subpage-target docs
282
+
283
+ # Extract links from results
284
+ exa-ai search "web development" \
285
+ --links 3 \
286
+ --image-links 2
287
+ ```
288
+
289
+ **Advanced Ruby API:**
290
+ ```ruby
291
+ client = Exa::Client.new(api_key: "your-key")
292
+
293
+ # Date range filtering
294
+ results = client.search("AI research",
295
+ start_published_date: "2025-01-01T00:00:00.000Z",
296
+ end_published_date: "2025-12-31T23:59:59.999Z"
297
+ )
298
+
299
+ # Text filtering
300
+ results = client.search("machine learning",
301
+ include_text: ["neural networks"],
302
+ exclude_text: ["cryptocurrency"]
303
+ )
304
+
305
+ # Full webpage text extraction
306
+ results = client.search("Ruby",
307
+ text: {
308
+ max_characters: 3000,
309
+ include_html_tags: true
310
+ }
311
+ )
312
+
313
+ # AI-powered summaries
314
+ results = client.search("climate change",
315
+ summary: {
316
+ query: "What are the main points?"
317
+ }
318
+ )
319
+
320
+ # Context for RAG pipelines
321
+ results = client.search("kubernetes",
322
+ context: {
323
+ max_characters: 5000
324
+ }
325
+ )
326
+
327
+ # Subpage crawling
328
+ results = client.search("documentation",
329
+ subpages: 1,
330
+ subpage_target: ["about", "docs", "guide"]
331
+ )
332
+
333
+ # Links and image extraction
334
+ results = client.search("web development",
335
+ extras: {
336
+ links: 3,
337
+ image_links: 2
338
+ }
339
+ )
340
+
341
+ # Combine multiple features
342
+ results = client.search("AI",
343
+ num_results: 5,
344
+ start_published_date: "2025-01-01T00:00:00.000Z",
345
+ text: { max_characters: 3000 },
346
+ summary: { query: "Main developments?" },
347
+ context: { max_characters: 5000 },
348
+ subpages: 1,
349
+ subpage_target: ["research"],
350
+ extras: { links: 3, image_links: 2 }
351
+ )
352
+
353
+ # Access extracted content
354
+ results.results.each do |result|
355
+ puts result["title"]
356
+ puts result["text"] if result["text"] # Full webpage text
357
+ puts result["summary"] if result["summary"] # AI summary
358
+ puts result["links"] if result["links"] # Extracted links
359
+ end
360
+ ```
361
+
227
362
  ### Answer Command
228
363
 
229
364
  Generate comprehensive answers to questions using Exa's answer generation feature:
data/exe/exa-ai-search CHANGED
@@ -24,6 +24,15 @@ def parse_args(argv)
24
24
  when "--type"
25
25
  args[:type] = argv[i + 1]
26
26
  i += 2
27
+ when "--category"
28
+ category = argv[i + 1]
29
+ valid_categories = ["company", "research paper", "news", "pdf", "github", "tweet", "personal site", "linkedin profile", "financial report"]
30
+ unless valid_categories.include?(category)
31
+ $stderr.puts "Error: Category must be one of: #{valid_categories.map { |c| "\"#{c}\"" }.join(', ')}"
32
+ exit 1
33
+ end
34
+ args[:category] = category
35
+ i += 2
27
36
  when "--include-domains"
28
37
  args[:include_domains] = argv[i + 1].split(",").map(&:strip)
29
38
  i += 2
@@ -39,6 +48,77 @@ def parse_args(argv)
39
48
  when "--output-format"
40
49
  args[:output_format] = argv[i + 1]
41
50
  i += 2
51
+ when "--linkedin"
52
+ linkedin_type = argv[i + 1]
53
+ valid_types = ["company", "person", "all"]
54
+ unless valid_types.include?(linkedin_type)
55
+ $stderr.puts "Error: LinkedIn type must be one of: #{valid_types.join(', ')}"
56
+ exit 1
57
+ end
58
+ args[:linkedin] = linkedin_type
59
+ i += 2
60
+ when "--start-published-date"
61
+ args[:start_published_date] = argv[i + 1]
62
+ i += 2
63
+ when "--end-published-date"
64
+ args[:end_published_date] = argv[i + 1]
65
+ i += 2
66
+ when "--start-crawl-date"
67
+ args[:start_crawl_date] = argv[i + 1]
68
+ i += 2
69
+ when "--end-crawl-date"
70
+ args[:end_crawl_date] = argv[i + 1]
71
+ i += 2
72
+ when "--include-text"
73
+ args[:include_text] ||= []
74
+ args[:include_text] << argv[i + 1]
75
+ i += 2
76
+ when "--exclude-text"
77
+ args[:exclude_text] ||= []
78
+ args[:exclude_text] << argv[i + 1]
79
+ i += 2
80
+ when "--text"
81
+ args[:text] = true
82
+ i += 1
83
+ when "--text-max-characters"
84
+ args[:text_max_characters] = argv[i + 1].to_i
85
+ i += 2
86
+ when "--include-html-tags"
87
+ args[:include_html_tags] = true
88
+ i += 1
89
+ when "--summary"
90
+ args[:summary] = true
91
+ i += 1
92
+ when "--summary-query"
93
+ args[:summary_query] = argv[i + 1]
94
+ i += 2
95
+ when "--summary-schema"
96
+ schema_arg = argv[i + 1]
97
+ args[:summary_schema] = if schema_arg.start_with?("@")
98
+ JSON.parse(File.read(schema_arg[1..]))
99
+ else
100
+ JSON.parse(schema_arg)
101
+ end
102
+ i += 2
103
+ when "--context"
104
+ args[:context] = true
105
+ i += 1
106
+ when "--context-max-characters"
107
+ args[:context_max_characters] = argv[i + 1].to_i
108
+ i += 2
109
+ when "--subpages"
110
+ args[:subpages] = argv[i + 1].to_i
111
+ i += 2
112
+ when "--subpage-target"
113
+ args[:subpage_target] ||= []
114
+ args[:subpage_target] << argv[i + 1]
115
+ i += 2
116
+ when "--links"
117
+ args[:links] = argv[i + 1].to_i
118
+ i += 2
119
+ when "--image-links"
120
+ args[:image_links] = argv[i + 1].to_i
121
+ i += 2
42
122
  when "--help", "-h"
43
123
  puts <<~HELP
44
124
  Usage: exa-api search QUERY [OPTIONS]
@@ -49,18 +129,50 @@ def parse_args(argv)
49
129
  QUERY Search query (required)
50
130
 
51
131
  Options:
52
- --num-results N Number of results to return (default: 10)
53
- --type TYPE Search type: keyword, neural, or auto (default: auto)
54
- --include-domains D Comma-separated list of domains to include
55
- --exclude-domains D Comma-separated list of domains to exclude
56
- --use-autoprompt Use Exa's autoprompt feature
57
- --api-key KEY Exa API key (or set EXA_API_KEY env var)
58
- --output-format FMT Output format: json, pretty, or text (default: json)
59
- --help, -h Show this help message
132
+ --num-results N Number of results to return (default: 10)
133
+ --type TYPE Search type: keyword, neural, fast, or auto (default: auto)
134
+ --category CAT Focus on specific data category
135
+ Options: "company", "research paper", "news", "pdf",
136
+ "github", "tweet", "personal site", "linkedin profile",
137
+ "financial report"
138
+ --include-domains D Comma-separated list of domains to include
139
+ --exclude-domains D Comma-separated list of domains to exclude
140
+ --start-published-date DATE Filter by published date (ISO 8601 format)
141
+ --end-published-date DATE Filter by published date (ISO 8601 format)
142
+ --start-crawl-date DATE Filter by crawl date (ISO 8601 format)
143
+ --end-crawl-date DATE Filter by crawl date (ISO 8601 format)
144
+ --include-text PHRASE Include results with exact phrase (repeatable)
145
+ --exclude-text PHRASE Exclude results with exact phrase (repeatable)
146
+
147
+ Content Extraction:
148
+ --text Include full webpage text
149
+ --text-max-characters N Max characters for webpage text
150
+ --include-html-tags Include HTML tags in text extraction
151
+ --summary Include AI-generated summary
152
+ --summary-query PROMPT Custom prompt for summary generation
153
+ --summary-schema FILE JSON schema for summary structure (@file syntax)
154
+ --context Format results as context for LLM RAG
155
+ --context-max-characters N Max characters for context string
156
+ --subpages N Number of subpages to crawl
157
+ --subpage-target PHRASE Subpage target phrases (repeatable)
158
+ --links N Number of links to extract per result
159
+ --image-links N Number of image links to extract
160
+
161
+ General Options:
162
+ --use-autoprompt Use Exa's autoprompt feature
163
+ --linkedin TYPE Search LinkedIn: company, person, or all
164
+ --api-key KEY Exa API key (or set EXA_API_KEY env var)
165
+ --output-format FMT Output format: json, pretty, or text (default: json)
166
+ --help, -h Show this help message
60
167
 
61
168
  Examples:
62
169
  exa-api search "ruby programming"
63
170
  exa-api search "machine learning" --num-results 5 --type keyword
171
+ exa-api search "Latest LLM research" --category "research paper"
172
+ exa-api search "AI startups" --category company
173
+ exa-api search "Anthropic" --linkedin company
174
+ exa-api search "Dario Amodei" --linkedin person
175
+ exa-api search "AI" --linkedin all
64
176
  exa-api search "AI research" --include-domains arxiv.org,scholar.google.com
65
177
  exa-api search "tutorials" --output-format pretty
66
178
  HELP
@@ -75,6 +187,55 @@ def parse_args(argv)
75
187
  args
76
188
  end
77
189
 
190
+ # Build contents parameter from extracted flags
191
+ def build_contents(args)
192
+ contents = {}
193
+
194
+ # Text options
195
+ if args[:text]
196
+ if args[:text_max_characters] || args[:include_html_tags]
197
+ contents[:text] = {}
198
+ contents[:text][:max_characters] = args[:text_max_characters] if args[:text_max_characters]
199
+ contents[:text][:include_html_tags] = args[:include_html_tags] if args[:include_html_tags]
200
+ else
201
+ contents[:text] = true
202
+ end
203
+ end
204
+
205
+ # Summary options
206
+ if args[:summary]
207
+ if args[:summary_query] || args[:summary_schema]
208
+ contents[:summary] = {}
209
+ contents[:summary][:query] = args[:summary_query] if args[:summary_query]
210
+ contents[:summary][:schema] = args[:summary_schema] if args[:summary_schema]
211
+ else
212
+ contents[:summary] = true
213
+ end
214
+ end
215
+
216
+ # Context options
217
+ if args[:context]
218
+ if args[:context_max_characters]
219
+ contents[:context] = { max_characters: args[:context_max_characters] }
220
+ else
221
+ contents[:context] = true
222
+ end
223
+ end
224
+
225
+ # Subpages options
226
+ contents[:subpages] = args[:subpages] if args[:subpages]
227
+ contents[:subpage_target] = args[:subpage_target] if args[:subpage_target]
228
+
229
+ # Extras options
230
+ if args[:links] || args[:image_links]
231
+ contents[:extras] = {}
232
+ contents[:extras][:links] = args[:links] if args[:links]
233
+ contents[:extras][:image_links] = args[:image_links] if args[:image_links]
234
+ end
235
+
236
+ contents.empty? ? nil : contents
237
+ end
238
+
78
239
  # Main execution
79
240
  begin
80
241
  args = parse_args(ARGV)
@@ -97,14 +258,32 @@ begin
97
258
 
98
259
  # Prepare search parameters
99
260
  search_params = {}
100
- search_params[:num_results] = args[:num_results] if args[:num_results]
261
+ search_params[:numResults] = args[:num_results] if args[:num_results]
101
262
  search_params[:type] = args[:type] if args[:type]
102
- search_params[:include_domains] = args[:include_domains] if args[:include_domains]
103
- search_params[:exclude_domains] = args[:exclude_domains] if args[:exclude_domains]
104
- search_params[:use_autoprompt] = args[:use_autoprompt] if args[:use_autoprompt]
263
+ search_params[:category] = args[:category] if args[:category]
264
+ search_params[:includeDomains] = args[:include_domains] if args[:include_domains]
265
+ search_params[:excludeDomains] = args[:exclude_domains] if args[:exclude_domains]
266
+ search_params[:start_published_date] = args[:start_published_date] if args[:start_published_date]
267
+ search_params[:end_published_date] = args[:end_published_date] if args[:end_published_date]
268
+ search_params[:start_crawl_date] = args[:start_crawl_date] if args[:start_crawl_date]
269
+ search_params[:end_crawl_date] = args[:end_crawl_date] if args[:end_crawl_date]
270
+ search_params[:include_text] = args[:include_text] if args[:include_text]
271
+ search_params[:exclude_text] = args[:exclude_text] if args[:exclude_text]
272
+ contents = build_contents(args)
273
+ search_params.merge!(contents) if contents
274
+ search_params[:useAutoprompt] = args[:use_autoprompt] if args[:use_autoprompt]
105
275
 
106
- # Execute search
107
- result = client.search(args[:query], **search_params)
276
+ # Execute search based on LinkedIn type
277
+ result = case args[:linkedin]
278
+ when "company"
279
+ client.linkedin_company(args[:query], **search_params)
280
+ when "person"
281
+ client.linkedin_person(args[:query], **search_params)
282
+ when "all"
283
+ client.search(args[:query], includeDomains: ["linkedin.com"], **search_params)
284
+ else
285
+ client.search(args[:query], **search_params)
286
+ end
108
287
 
109
288
  # Format and output result
110
289
  output = Exa::CLI::Formatters::SearchFormatter.format(result, output_format)
data/lib/exa/client.rb CHANGED
@@ -107,6 +107,32 @@ module Exa
107
107
  Services::Context.new(connection, query: query, **params).call
108
108
  end
109
109
 
110
+ # Search for LinkedIn company pages
111
+ #
112
+ # Convenience method that restricts search to LinkedIn company profiles
113
+ # using keyword search for precise name matching.
114
+ #
115
+ # @param query [String] Company name to search
116
+ # @param params [Hash] Additional search parameters
117
+ # @option params [Integer] :numResults Number of results to return
118
+ # @return [Resources::SearchResult] LinkedIn company results
119
+ def linkedin_company(query, **params)
120
+ search(query, type: "keyword", includeDomains: ["linkedin.com/company"], **params)
121
+ end
122
+
123
+ # Search for LinkedIn profiles
124
+ #
125
+ # Convenience method that restricts search to LinkedIn individual profiles
126
+ # using keyword search for precise name matching.
127
+ #
128
+ # @param query [String] Person name to search
129
+ # @param params [Hash] Additional search parameters
130
+ # @option params [Integer] :numResults Number of results to return
131
+ # @return [Resources::SearchResult] LinkedIn profile results
132
+ def linkedin_person(query, **params)
133
+ search(query, type: "keyword", includeDomains: ["linkedin.com/in"], **params)
134
+ end
135
+
110
136
  private
111
137
 
112
138
  def connection
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Exa
4
+ module Services
5
+ # Converts Ruby parameter names (snake_case) to API format (camelCase)
6
+ # Handles both simple parameters and nested content parameters
7
+ class ParameterConverter
8
+ def self.convert(params)
9
+ new.convert(params)
10
+ end
11
+
12
+ def convert(params)
13
+ converted = {}
14
+ contents = {}
15
+
16
+ params.each do |key, value|
17
+ if content_key?(key)
18
+ contents[convert_content_key(key)] = convert_content_value(key, value)
19
+ else
20
+ converted[convert_key(key)] = value
21
+ end
22
+ end
23
+
24
+ converted[:contents] = contents if contents.any?
25
+ converted
26
+ end
27
+
28
+ private
29
+
30
+ def convert_key(key)
31
+ case key
32
+ when :start_published_date then :startPublishedDate
33
+ when :end_published_date then :endPublishedDate
34
+ when :start_crawl_date then :startCrawlDate
35
+ when :end_crawl_date then :endCrawlDate
36
+ when :include_text then :includeText
37
+ when :exclude_text then :excludeText
38
+ else
39
+ key
40
+ end
41
+ end
42
+
43
+ def content_key?(key)
44
+ %i[text summary context subpages subpage_target extras].include?(key)
45
+ end
46
+
47
+ def convert_content_key(key)
48
+ case key
49
+ when :subpage_target then :subpageTarget
50
+ else
51
+ key
52
+ end
53
+ end
54
+
55
+ def convert_content_value(key, value)
56
+ case key
57
+ when :text
58
+ if value.is_a?(Hash)
59
+ convert_hash_value(value, text_hash_mappings)
60
+ else
61
+ value
62
+ end
63
+ when :summary
64
+ if value.is_a?(Hash)
65
+ convert_hash_value(value, summary_hash_mappings)
66
+ else
67
+ value
68
+ end
69
+ when :context
70
+ if value.is_a?(Hash)
71
+ convert_hash_value(value, context_hash_mappings)
72
+ else
73
+ value
74
+ end
75
+ when :extras
76
+ if value.is_a?(Hash)
77
+ convert_hash_value(value, extras_hash_mappings)
78
+ else
79
+ value
80
+ end
81
+ else
82
+ value
83
+ end
84
+ end
85
+
86
+ def convert_hash_value(hash, mappings)
87
+ converted = {}
88
+ hash.each do |k, v|
89
+ converted_key = mappings[k] || k
90
+ converted[converted_key] = v
91
+ end
92
+ converted
93
+ end
94
+
95
+ def text_hash_mappings
96
+ {
97
+ max_characters: :maxCharacters,
98
+ include_html_tags: :includeHtmlTags
99
+ }
100
+ end
101
+
102
+ def summary_hash_mappings
103
+ {
104
+ query: :query,
105
+ schema: :schema
106
+ }
107
+ end
108
+
109
+ def context_hash_mappings
110
+ {
111
+ max_characters: :maxCharacters
112
+ }
113
+ end
114
+
115
+ def extras_hash_mappings
116
+ {
117
+ image_links: :imageLinks
118
+ }
119
+ end
120
+ end
121
+ end
122
+ end
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "parameter_converter"
4
+
3
5
  module Exa
4
6
  module Services
5
7
  class Search
@@ -9,7 +11,7 @@ module Exa
9
11
  end
10
12
 
11
13
  def call
12
- response = @connection.post("/search", @params)
14
+ response = @connection.post("/search", ParameterConverter.convert(@params))
13
15
  body = response.body
14
16
 
15
17
  Resources::SearchResult.new(
data/lib/exa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Exa
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: exa-ai
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Jackson
@@ -142,6 +142,7 @@ files:
142
142
  - lib/exa/services/context.rb
143
143
  - lib/exa/services/find_similar.rb
144
144
  - lib/exa/services/get_contents.rb
145
+ - lib/exa/services/parameter_converter.rb
145
146
  - lib/exa/services/research_get.rb
146
147
  - lib/exa/services/research_list.rb
147
148
  - lib/exa/services/research_start.rb