exa-ai 0.6.0 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ module Exa
9
9
  when "json"
10
10
  JSON.generate(item)
11
11
  when "pretty"
12
- JSON.pretty_generate(item)
12
+ format_as_pretty(item)
13
13
  when "text"
14
14
  format_as_text(item)
15
15
  when "toon"
@@ -19,21 +19,42 @@ module Exa
19
19
  end
20
20
  end
21
21
 
22
- def self.format_collection(items, output_format)
22
+ def self.format_collection(collection, output_format)
23
23
  case output_format
24
24
  when "json"
25
- JSON.generate(items)
25
+ JSON.generate(collection.to_h)
26
26
  when "pretty"
27
- JSON.pretty_generate(items)
27
+ format_collection_as_pretty(collection)
28
28
  when "text"
29
- format_collection_as_text(items)
29
+ format_collection_as_text(collection)
30
30
  when "toon"
31
- Exa::CLI::Base.encode_as_toon(items)
31
+ Exa::CLI::Base.encode_as_toon(collection.to_h)
32
32
  else
33
33
  raise ArgumentError, "Unknown output format: #{output_format}"
34
34
  end
35
35
  end
36
36
 
37
+ def self.format_as_pretty(item)
38
+ lines = []
39
+ lines << "Item ID: #{item['id']}"
40
+ lines << "URL: #{item['url']}" if item['url']
41
+ lines << "Title: #{item['title']}" if item['title']
42
+ lines << "Status: #{item['status']}" if item['status']
43
+ lines << "Created: #{item['createdAt']}" if item['createdAt']
44
+ lines << "Updated: #{item['updatedAt']}" if item['updatedAt']
45
+
46
+ if item['entity']
47
+ lines << ""
48
+ lines << "Entity:"
49
+ lines << " Type: #{item['entity']['type']}" if item['entity']['type']
50
+ lines << " Name: #{item['entity']['name']}" if item['entity']['name']
51
+ lines << " Description: #{item['entity']['description']}" if item['entity']['description']
52
+ end
53
+
54
+ lines.join("\n")
55
+ end
56
+ private_class_method :format_as_pretty
57
+
37
58
  def self.format_as_text(item)
38
59
  lines = []
39
60
  lines << "Item: #{item['id']}"
@@ -53,9 +74,41 @@ module Exa
53
74
  end
54
75
  private_class_method :format_as_text
55
76
 
56
- def self.format_collection_as_text(items)
57
- lines = ["Items (#{items.length} total):"]
58
- items.each_with_index do |item, idx|
77
+ def self.format_collection_as_pretty(collection)
78
+ lines = []
79
+ lines << "Webset Items (#{collection.data.length} items)"
80
+
81
+ if collection.has_more
82
+ lines << "Next Cursor: #{collection.next_cursor}"
83
+ end
84
+
85
+ lines << ""
86
+
87
+ collection.data.each_with_index do |item, idx|
88
+ lines << "" if idx > 0 # Blank line between items
89
+
90
+ lines << "Item ID: #{item['id']}"
91
+ lines << "URL: #{item['url']}" if item['url']
92
+ lines << "Title: #{item['title']}" if item['title']
93
+ lines << "Status: #{item['status']}" if item['status']
94
+ lines << "Created: #{item['createdAt']}" if item['createdAt']
95
+ lines << "Updated: #{item['updatedAt']}" if item['updatedAt']
96
+
97
+ if item['entity']
98
+ entity_name = item['entity']['name']
99
+ entity_type = item['entity']['type']
100
+ lines << "Entity: #{entity_name}" if entity_name
101
+ lines << "Entity Type: #{entity_type}" if entity_type && !entity_name
102
+ end
103
+ end
104
+
105
+ lines.join("\n")
106
+ end
107
+ private_class_method :format_collection_as_pretty
108
+
109
+ def self.format_collection_as_text(collection)
110
+ lines = ["Webset Items (#{collection.data.length} items):"]
111
+ collection.data.each_with_index do |item, idx|
59
112
  lines << "\n#{idx + 1}. #{item['id']}"
60
113
  lines << " URL: #{item['url']}" if item['url']
61
114
  lines << " Title: #{item['title']}" if item['title']
@@ -64,6 +117,11 @@ module Exa
64
117
  lines << " Entity: #{item['entity']['name']}"
65
118
  end
66
119
  end
120
+
121
+ if collection.has_more
122
+ lines << "\nMore available (cursor: #{collection.next_cursor})"
123
+ end
124
+
67
125
  lines.join("\n")
68
126
  end
69
127
  private_class_method :format_collection_as_text
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Exa
4
+ module CLI
5
+ module Formatters
6
+ class WebsetSearchFormatter
7
+ def self.format(search, format)
8
+ case format
9
+ when "json"
10
+ JSON.pretty_generate(search.to_h)
11
+ when "pretty"
12
+ format_pretty(search)
13
+ when "text"
14
+ format_text(search)
15
+ when "toon"
16
+ Exa::CLI::Base.encode_as_toon(search.to_h)
17
+ else
18
+ JSON.pretty_generate(search.to_h)
19
+ end
20
+ end
21
+
22
+ private
23
+
24
+ def self.format_pretty(search)
25
+ output = []
26
+ output << "Search ID: #{search.id}"
27
+ output << "Status: #{search.status}"
28
+ output << "Query: #{search.query}"
29
+ output << "Entity Type: #{search.entity&.[]('type') || 'N/A'}" if search.entity
30
+ output << "Count: #{search.count}" if search.count
31
+ output << "Behavior: #{search.behavior}"
32
+ output << "Recall: #{search.recall}" if search.recall
33
+ output << "Created: #{search.created_at}"
34
+ output << "Updated: #{search.updated_at}"
35
+ output << "Progress: #{search.progress}" if search.progress
36
+ output << ""
37
+
38
+ if search.canceled?
39
+ output << "Canceled: #{search.canceled_at}"
40
+ output << "Cancel Reason: #{search.canceled_reason}" if search.canceled_reason
41
+ end
42
+
43
+ output.join("\n")
44
+ end
45
+
46
+ def self.format_text(search)
47
+ [
48
+ "ID: #{search.id}",
49
+ "Status: #{search.status}",
50
+ "Query: #{search.query}",
51
+ "Behavior: #{search.behavior}"
52
+ ].join("\n")
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Exa
4
+ module CLI
5
+ class SearchParser
6
+ VALID_SEARCH_TYPES = ["fast", "deep", "keyword", "auto"].freeze
7
+ VALID_CATEGORIES = [
8
+ "company", "research paper", "news", "pdf", "github",
9
+ "tweet", "personal site", "financial report", "people"
10
+ ].freeze
11
+
12
+ def self.parse(argv)
13
+ new(argv).parse
14
+ end
15
+
16
+ def initialize(argv)
17
+ @argv = argv
18
+ @args = {
19
+ output_format: "json",
20
+ api_key: nil
21
+ }
22
+ end
23
+
24
+ def parse
25
+ parse_arguments
26
+ validate_query
27
+ @args
28
+ end
29
+
30
+ private
31
+
32
+ def parse_arguments
33
+ query_parts = []
34
+ i = 0
35
+
36
+ while i < @argv.length
37
+ arg = @argv[i]
38
+ case arg
39
+ when "--num-results"
40
+ @args[:num_results] = @argv[i + 1].to_i
41
+ i += 2
42
+ when "--type"
43
+ search_type = @argv[i + 1]
44
+ validate_search_type(search_type)
45
+ @args[:type] = search_type
46
+ i += 2
47
+ when "--category"
48
+ category = @argv[i + 1]
49
+ validate_category(category)
50
+ @args[:category] = category
51
+ i += 2
52
+ when "--include-domains"
53
+ @args[:include_domains] = @argv[i + 1].split(",").map(&:strip)
54
+ i += 2
55
+ when "--exclude-domains"
56
+ @args[:exclude_domains] = @argv[i + 1].split(",").map(&:strip)
57
+ i += 2
58
+ when "--api-key"
59
+ @args[:api_key] = @argv[i + 1]
60
+ i += 2
61
+ when "--output-format"
62
+ @args[:output_format] = @argv[i + 1]
63
+ i += 2
64
+ when "--start-published-date"
65
+ @args[:start_published_date] = @argv[i + 1]
66
+ i += 2
67
+ when "--end-published-date"
68
+ @args[:end_published_date] = @argv[i + 1]
69
+ i += 2
70
+ when "--start-crawl-date"
71
+ @args[:start_crawl_date] = @argv[i + 1]
72
+ i += 2
73
+ when "--end-crawl-date"
74
+ @args[:end_crawl_date] = @argv[i + 1]
75
+ i += 2
76
+ when "--include-text"
77
+ @args[:include_text] ||= []
78
+ @args[:include_text] << @argv[i + 1]
79
+ i += 2
80
+ when "--exclude-text"
81
+ @args[:exclude_text] ||= []
82
+ @args[:exclude_text] << @argv[i + 1]
83
+ i += 2
84
+ when "--text"
85
+ @args[:text] = true
86
+ i += 1
87
+ when "--text-max-characters"
88
+ @args[:text_max_characters] = @argv[i + 1].to_i
89
+ i += 2
90
+ when "--include-html-tags"
91
+ @args[:include_html_tags] = true
92
+ i += 1
93
+ when "--summary"
94
+ @args[:summary] = true
95
+ i += 1
96
+ when "--summary-query"
97
+ @args[:summary_query] = @argv[i + 1]
98
+ i += 2
99
+ when "--summary-schema"
100
+ schema_arg = @argv[i + 1]
101
+ @args[:summary_schema] = if schema_arg.start_with?("@")
102
+ JSON.parse(File.read(schema_arg[1..]))
103
+ else
104
+ JSON.parse(schema_arg)
105
+ end
106
+ i += 2
107
+ when "--context"
108
+ @args[:context] = true
109
+ i += 1
110
+ when "--context-max-characters"
111
+ @args[:context_max_characters] = @argv[i + 1].to_i
112
+ i += 2
113
+ when "--subpages"
114
+ @args[:subpages] = @argv[i + 1].to_i
115
+ i += 2
116
+ when "--subpage-target"
117
+ @args[:subpage_target] ||= []
118
+ @args[:subpage_target] << @argv[i + 1]
119
+ i += 2
120
+ when "--links"
121
+ @args[:links] = @argv[i + 1].to_i
122
+ i += 2
123
+ when "--image-links"
124
+ @args[:image_links] = @argv[i + 1].to_i
125
+ i += 2
126
+ else
127
+ query_parts << arg
128
+ i += 1
129
+ end
130
+ end
131
+
132
+ @args[:query] = query_parts.join(" ")
133
+ end
134
+
135
+ def validate_query
136
+ raise ArgumentError, "Query is required" if @args[:query].nil? || @args[:query].empty?
137
+ end
138
+
139
+ def validate_search_type(search_type)
140
+ return if VALID_SEARCH_TYPES.include?(search_type)
141
+
142
+ raise ArgumentError, "Search type must be one of: #{VALID_SEARCH_TYPES.join(', ')}"
143
+ end
144
+
145
+ def validate_category(category)
146
+ return if VALID_CATEGORIES.include?(category)
147
+
148
+ raise ArgumentError, "Category must be one of: #{VALID_CATEGORIES.map { |c| "\"#{c}\"" }.join(', ')}"
149
+ end
150
+ end
151
+ end
152
+ end
data/lib/exa/client.rb CHANGED
@@ -122,32 +122,6 @@ module Exa
122
122
  Services::Context.new(connection, query: query, **params).call
123
123
  end
124
124
 
125
- # Search for LinkedIn company pages
126
- #
127
- # Convenience method that restricts search to LinkedIn company profiles
128
- # using keyword search for precise name matching.
129
- #
130
- # @param query [String] Company name to search
131
- # @param params [Hash] Additional search parameters
132
- # @option params [Integer] :numResults Number of results to return
133
- # @return [Resources::SearchResult] LinkedIn company results
134
- def linkedin_company(query, **params)
135
- search(query, type: "keyword", includeDomains: ["linkedin.com/company"], **params)
136
- end
137
-
138
- # Search for LinkedIn profiles
139
- #
140
- # Convenience method that restricts search to LinkedIn individual profiles
141
- # using keyword search for precise name matching.
142
- #
143
- # @param query [String] Person name to search
144
- # @param params [Hash] Additional search parameters
145
- # @option params [Integer] :numResults Number of results to return
146
- # @return [Resources::SearchResult] LinkedIn profile results
147
- def linkedin_person(query, **params)
148
- search(query, type: "keyword", includeDomains: ["linkedin.com/in"], **params)
149
- end
150
-
151
125
  # List all websets
152
126
  #
153
127
  # @param params [Hash] Pagination parameters
@@ -314,9 +288,12 @@ module Exa
314
288
  # List all items in a webset
315
289
  #
316
290
  # @param webset_id [String] Webset ID
317
- # @return [Array<Hash>] Array of items
318
- def list_items(webset_id:)
319
- Services::Websets::ListItems.new(connection, webset_id: webset_id).call
291
+ # @param params [Hash] Pagination parameters
292
+ # @option params [String] :cursor Cursor for pagination
293
+ # @option params [Integer] :limit Maximum number of items to return (default: 20)
294
+ # @return [Resources::WebsetItemCollection] Paginated list of items
295
+ def list_items(webset_id:, **params)
296
+ Services::Websets::ListItems.new(connection, webset_id: webset_id, **params).call
320
297
  end
321
298
 
322
299
  # List all imports
@@ -7,7 +7,7 @@ module Exa
7
7
  ENTITY_TYPES = %w[company person article research_paper custom].freeze
8
8
 
9
9
  # Valid enrichment formats
10
- ENRICHMENT_FORMATS = %w[text date number options url].freeze
10
+ ENRICHMENT_FORMATS = %w[text date number options email phone url].freeze
11
11
 
12
12
  # Valid source types for imports and exclusions
13
13
  SOURCE_TYPES = %w[import webset].freeze
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Exa
4
+ module Resources
5
+ # Represents a paginated list of webset items from the Exa API
6
+ #
7
+ # This class wraps the JSON response from the GET /websets/v0/websets/{id}/items endpoint
8
+ # and provides pagination support.
9
+ class WebsetItemCollection < Struct.new(
10
+ :data,
11
+ :has_more,
12
+ :next_cursor,
13
+ keyword_init: true
14
+ )
15
+ def initialize(data:, has_more: false, next_cursor: nil)
16
+ super
17
+ freeze
18
+ end
19
+
20
+ def empty?
21
+ data.empty?
22
+ end
23
+
24
+ def to_h
25
+ {
26
+ data: data,
27
+ has_more: has_more,
28
+ next_cursor: next_cursor
29
+ }
30
+ end
31
+ end
32
+ end
33
+ end
@@ -20,6 +20,7 @@ module Exa
20
20
  validate_exclude!(params[:exclude]) if params[:exclude]
21
21
  validate_external_id!(params[:externalId]) if params[:externalId]
22
22
  validate_metadata!(params[:metadata]) if params[:metadata]
23
+ validate_no_duplicate_ids_in_import_and_scope!(params)
23
24
  end
24
25
 
25
26
  private
@@ -184,6 +185,20 @@ module Exa
184
185
  raise ArgumentError, "#{name} must be at least #{min} characters" if min && value.length < min
185
186
  raise ArgumentError, "#{name} cannot exceed #{max} characters" if max && value.length > max
186
187
  end
188
+
189
+ def validate_no_duplicate_ids_in_import_and_scope!(params)
190
+ return unless params[:import] && params[:search] && params[:search][:scope]
191
+
192
+ import_ids = params[:import].map { |item| item[:id] }
193
+ scope_ids = params[:search][:scope].map { |item| item[:id] }
194
+
195
+ duplicates = import_ids & scope_ids
196
+
197
+ return if duplicates.empty?
198
+
199
+ raise ArgumentError,
200
+ "Cannot use the same import/webset ID in both :import and search[:scope]: #{duplicates.join(', ')}"
201
+ end
187
202
  end
188
203
  end
189
204
  end
@@ -4,15 +4,21 @@ module Exa
4
4
  module Services
5
5
  module Websets
6
6
  class ListItems
7
- def initialize(connection, webset_id:)
7
+ def initialize(connection, webset_id:, **params)
8
8
  @connection = connection
9
9
  @webset_id = webset_id
10
+ @params = params
10
11
  end
11
12
 
12
13
  def call
13
- response = @connection.get("/websets/v0/websets/#{@webset_id}/items")
14
+ response = @connection.get("/websets/v0/websets/#{@webset_id}/items", @params)
14
15
  body = response.body
15
- body["data"] || []
16
+
17
+ Resources::WebsetItemCollection.new(
18
+ data: body["data"] || [],
19
+ has_more: body["hasMore"] || false,
20
+ next_cursor: body["nextCursor"]
21
+ )
16
22
  end
17
23
  end
18
24
  end
data/lib/exa/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Exa
4
- VERSION = "0.6.0"
4
+ VERSION = "0.7.1"
5
5
  end
data/lib/exa.rb CHANGED
@@ -17,6 +17,7 @@ require_relative "exa/resources/webset"
17
17
  require_relative "exa/resources/webset_search"
18
18
  require_relative "exa/resources/webset_enrichment"
19
19
  require_relative "exa/resources/webset_enrichment_collection"
20
+ require_relative "exa/resources/webset_item_collection"
20
21
  require_relative "exa/resources/import"
21
22
  require_relative "exa/resources/import_collection"
22
23
  require_relative "exa/resources/monitor"
@@ -67,6 +68,7 @@ require_relative "exa/cli/base"
67
68
  require_relative "exa/cli/polling"
68
69
  require_relative "exa/cli/error_handler"
69
70
  require_relative "exa/cli/formatters/search_formatter"
71
+ require_relative "exa/cli/formatters/webset_search_formatter"
70
72
  require_relative "exa/cli/formatters/context_formatter"
71
73
  require_relative "exa/cli/formatters/contents_formatter"
72
74
  require_relative "exa/cli/formatters/research_formatter"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: exa-ai
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Benjamin Jackson
@@ -135,6 +135,20 @@ dependencies:
135
135
  - - "~>"
136
136
  - !ruby/object:Gem::Version
137
137
  version: '0.9'
138
+ - !ruby/object:Gem::Dependency
139
+ name: dotenv
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '3.0'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '3.0'
138
152
  description: A Ruby gem for interacting with the Exa.ai search and discovery API
139
153
  email:
140
154
  - ben@hearmeout.co
@@ -206,7 +220,9 @@ files:
206
220
  - lib/exa/cli/formatters/search_formatter.rb
207
221
  - lib/exa/cli/formatters/webset_formatter.rb
208
222
  - lib/exa/cli/formatters/webset_item_formatter.rb
223
+ - lib/exa/cli/formatters/webset_search_formatter.rb
209
224
  - lib/exa/cli/polling.rb
225
+ - lib/exa/cli/search_parser.rb
210
226
  - lib/exa/client.rb
211
227
  - lib/exa/connection.rb
212
228
  - lib/exa/constants/websets.rb
@@ -229,6 +245,7 @@ files:
229
245
  - lib/exa/resources/webset_collection.rb
230
246
  - lib/exa/resources/webset_enrichment.rb
231
247
  - lib/exa/resources/webset_enrichment_collection.rb
248
+ - lib/exa/resources/webset_item_collection.rb
232
249
  - lib/exa/resources/webset_search.rb
233
250
  - lib/exa/services/answer.rb
234
251
  - lib/exa/services/answer_stream.rb