context.dev 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +23 -0
  3. data/README.md +1 -1
  4. data/lib/context_dev/internal/util.rb +3 -1
  5. data/lib/context_dev/models/brand_identify_from_transaction_params.rb +65 -0
  6. data/lib/context_dev/models/brand_identify_from_transaction_response.rb +190 -5
  7. data/lib/context_dev/models/brand_retrieve_by_email_params.rb +65 -0
  8. data/lib/context_dev/models/brand_retrieve_by_email_response.rb +190 -5
  9. data/lib/context_dev/models/brand_retrieve_by_isin_params.rb +65 -0
  10. data/lib/context_dev/models/brand_retrieve_by_isin_response.rb +190 -5
  11. data/lib/context_dev/models/brand_retrieve_by_name_params.rb +65 -0
  12. data/lib/context_dev/models/brand_retrieve_by_name_response.rb +190 -5
  13. data/lib/context_dev/models/brand_retrieve_by_ticker_params.rb +65 -0
  14. data/lib/context_dev/models/brand_retrieve_by_ticker_response.rb +190 -5
  15. data/lib/context_dev/models/brand_retrieve_params.rb +68 -5
  16. data/lib/context_dev/models/brand_retrieve_response.rb +190 -5
  17. data/lib/context_dev/models/web_screenshot_params.rb +17 -5
  18. data/lib/context_dev/models/web_web_crawl_md_params.rb +92 -0
  19. data/lib/context_dev/models/web_web_crawl_md_response.rb +121 -0
  20. data/lib/context_dev/models.rb +2 -0
  21. data/lib/context_dev/resources/brand.rb +1 -1
  22. data/lib/context_dev/resources/web.rb +51 -5
  23. data/lib/context_dev/version.rb +1 -1
  24. data/lib/context_dev.rb +2 -0
  25. data/rbi/context_dev/models/brand_identify_from_transaction_params.rbi +325 -0
  26. data/rbi/context_dev/models/brand_identify_from_transaction_response.rbi +859 -6
  27. data/rbi/context_dev/models/brand_retrieve_by_email_params.rbi +325 -0
  28. data/rbi/context_dev/models/brand_retrieve_by_email_response.rbi +859 -6
  29. data/rbi/context_dev/models/brand_retrieve_by_isin_params.rbi +325 -0
  30. data/rbi/context_dev/models/brand_retrieve_by_isin_response.rbi +859 -6
  31. data/rbi/context_dev/models/brand_retrieve_by_name_params.rbi +325 -0
  32. data/rbi/context_dev/models/brand_retrieve_by_name_response.rbi +859 -6
  33. data/rbi/context_dev/models/brand_retrieve_by_ticker_params.rbi +325 -0
  34. data/rbi/context_dev/models/brand_retrieve_by_ticker_response.rbi +859 -6
  35. data/rbi/context_dev/models/brand_retrieve_params.rbi +328 -6
  36. data/rbi/context_dev/models/brand_retrieve_response.rbi +859 -6
  37. data/rbi/context_dev/models/web_screenshot_params.rbi +27 -6
  38. data/rbi/context_dev/models/web_web_crawl_md_params.rbi +137 -0
  39. data/rbi/context_dev/models/web_web_crawl_md_response.rbi +230 -0
  40. data/rbi/context_dev/models.rbi +2 -0
  41. data/rbi/context_dev/resources/brand.rbi +1 -2
  42. data/rbi/context_dev/resources/web.rbi +54 -4
  43. data/sig/context_dev/models/brand_identify_from_transaction_params.rbs +131 -1
  44. data/sig/context_dev/models/brand_identify_from_transaction_response.rbs +342 -5
  45. data/sig/context_dev/models/brand_retrieve_by_email_params.rbs +131 -1
  46. data/sig/context_dev/models/brand_retrieve_by_email_response.rbs +342 -5
  47. data/sig/context_dev/models/brand_retrieve_by_isin_params.rbs +131 -1
  48. data/sig/context_dev/models/brand_retrieve_by_isin_response.rbs +342 -5
  49. data/sig/context_dev/models/brand_retrieve_by_name_params.rbs +131 -1
  50. data/sig/context_dev/models/brand_retrieve_by_name_response.rbs +342 -5
  51. data/sig/context_dev/models/brand_retrieve_by_ticker_params.rbs +131 -1
  52. data/sig/context_dev/models/brand_retrieve_by_ticker_response.rbs +342 -5
  53. data/sig/context_dev/models/brand_retrieve_params.rbs +131 -1
  54. data/sig/context_dev/models/brand_retrieve_response.rbs +342 -5
  55. data/sig/context_dev/models/web_screenshot_params.rbs +11 -2
  56. data/sig/context_dev/models/web_web_crawl_md_params.rbs +82 -0
  57. data/sig/context_dev/models/web_web_crawl_md_response.rbs +116 -0
  58. data/sig/context_dev/models.rbs +2 -0
  59. data/sig/context_dev/resources/web.rbs +15 -1
  60. metadata +8 -2
@@ -11,10 +11,22 @@ module ContextDev
11
11
  T.any(ContextDev::WebScreenshotParams, ContextDev::Internal::AnyHash)
12
12
  end
13
13
 
14
+ # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
15
+ # 'https://example.com/pricing'). When provided, the screenshot is taken of this
16
+ # exact URL.
17
+ sig { returns(T.nilable(String)) }
18
+ attr_reader :direct_url
19
+
20
+ sig { params(direct_url: String).void }
21
+ attr_writer :direct_url
22
+
14
23
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
15
24
  # domain will be automatically normalized and validated.
16
- sig { returns(String) }
17
- attr_accessor :domain
25
+ sig { returns(T.nilable(String)) }
26
+ attr_reader :domain
27
+
28
+ sig { params(domain: String).void }
29
+ attr_writer :domain
18
30
 
19
31
  # Optional parameter to determine screenshot type. If 'true', takes a full page
20
32
  # screenshot capturing all content. If 'false' or not provided, takes a viewport
@@ -37,7 +49,8 @@ module ContextDev
37
49
  # Optional parameter to specify which page type to screenshot. If provided, the
38
50
  # system will scrape the domain's links and use heuristics to find the most
39
51
  # appropriate URL for the specified page type (30 supported languages). If not
40
- # provided, screenshots the main domain landing page.
52
+ # provided, screenshots the main domain landing page. Only applicable when using
53
+ # 'domain', not 'directUrl'.
41
54
  sig do
42
55
  returns(T.nilable(ContextDev::WebScreenshotParams::Page::OrSymbol))
43
56
  end
@@ -65,6 +78,7 @@ module ContextDev
65
78
 
66
79
  sig do
67
80
  params(
81
+ direct_url: String,
68
82
  domain: String,
69
83
  full_screenshot:
70
84
  ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
@@ -74,9 +88,13 @@ module ContextDev
74
88
  ).returns(T.attached_class)
75
89
  end
76
90
  def self.new(
91
+ # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
92
+ # 'https://example.com/pricing'). When provided, the screenshot is taken of this
93
+ # exact URL.
94
+ direct_url: nil,
77
95
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
78
96
  # domain will be automatically normalized and validated.
79
- domain:,
97
+ domain: nil,
80
98
  # Optional parameter to determine screenshot type. If 'true', takes a full page
81
99
  # screenshot capturing all content. If 'false' or not provided, takes a viewport
82
100
  # screenshot (standard browser view).
@@ -84,7 +102,8 @@ module ContextDev
84
102
  # Optional parameter to specify which page type to screenshot. If provided, the
85
103
  # system will scrape the domain's links and use heuristics to find the most
86
104
  # appropriate URL for the specified page type (30 supported languages). If not
87
- # provided, screenshots the main domain landing page.
105
+ # provided, screenshots the main domain landing page. Only applicable when using
106
+ # 'domain', not 'directUrl'.
88
107
  page: nil,
89
108
  # Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
90
109
  # faster capture with basic quality. If 'quality', optimizes for higher quality
@@ -97,6 +116,7 @@ module ContextDev
97
116
  sig do
98
117
  override.returns(
99
118
  {
119
+ direct_url: String,
100
120
  domain: String,
101
121
  full_screenshot:
102
122
  ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
@@ -146,7 +166,8 @@ module ContextDev
146
166
  # Optional parameter to specify which page type to screenshot. If provided, the
147
167
  # system will scrape the domain's links and use heuristics to find the most
148
168
  # appropriate URL for the specified page type (30 supported languages). If not
149
- # provided, screenshots the main domain landing page.
169
+ # provided, screenshots the main domain landing page. Only applicable when using
170
+ # 'domain', not 'directUrl'.
150
171
  module Page
151
172
  extend ContextDev::Internal::Type::Enum
152
173
 
@@ -0,0 +1,137 @@
1
+ # typed: strong
2
+
3
+ module ContextDev
4
+ module Models
5
+ class WebWebCrawlMdParams < ContextDev::Internal::Type::BaseModel
6
+ extend ContextDev::Internal::Type::RequestParameters::Converter
7
+ include ContextDev::Internal::Type::RequestParameters
8
+
9
+ OrHash =
10
+ T.type_alias do
11
+ T.any(ContextDev::WebWebCrawlMdParams, ContextDev::Internal::AnyHash)
12
+ end
13
+
14
+ # The starting URL for the crawl (must include http:// or https:// protocol)
15
+ sig { returns(String) }
16
+ attr_accessor :url
17
+
18
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
19
+ # docs.example.com when starting from example.com). www and apex are always
20
+ # treated as equivalent.
21
+ sig { returns(T.nilable(T::Boolean)) }
22
+ attr_reader :follow_subdomains
23
+
24
+ sig { params(follow_subdomains: T::Boolean).void }
25
+ attr_writer :follow_subdomains
26
+
27
+ # Include image references in the Markdown output
28
+ sig { returns(T.nilable(T::Boolean)) }
29
+ attr_reader :include_images
30
+
31
+ sig { params(include_images: T::Boolean).void }
32
+ attr_writer :include_images
33
+
34
+ # Preserve hyperlinks in the Markdown output
35
+ sig { returns(T.nilable(T::Boolean)) }
36
+ attr_reader :include_links
37
+
38
+ sig { params(include_links: T::Boolean).void }
39
+ attr_writer :include_links
40
+
41
+ # Maximum link depth from the starting URL (0 = only the starting page)
42
+ sig { returns(T.nilable(Integer)) }
43
+ attr_reader :max_depth
44
+
45
+ sig { params(max_depth: Integer).void }
46
+ attr_writer :max_depth
47
+
48
+ # Maximum number of pages to crawl. Hard cap: 500.
49
+ sig { returns(T.nilable(Integer)) }
50
+ attr_reader :max_pages
51
+
52
+ sig { params(max_pages: Integer).void }
53
+ attr_writer :max_pages
54
+
55
+ # Truncate base64-encoded image data in the Markdown output
56
+ sig { returns(T.nilable(T::Boolean)) }
57
+ attr_reader :shorten_base64_images
58
+
59
+ sig { params(shorten_base64_images: T::Boolean).void }
60
+ attr_writer :shorten_base64_images
61
+
62
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
63
+ sig { returns(T.nilable(String)) }
64
+ attr_reader :url_regex
65
+
66
+ sig { params(url_regex: String).void }
67
+ attr_writer :url_regex
68
+
69
+ # Extract only the main content, stripping headers, footers, sidebars, and
70
+ # navigation
71
+ sig { returns(T.nilable(T::Boolean)) }
72
+ attr_reader :use_main_content_only
73
+
74
+ sig { params(use_main_content_only: T::Boolean).void }
75
+ attr_writer :use_main_content_only
76
+
77
+ sig do
78
+ params(
79
+ url: String,
80
+ follow_subdomains: T::Boolean,
81
+ include_images: T::Boolean,
82
+ include_links: T::Boolean,
83
+ max_depth: Integer,
84
+ max_pages: Integer,
85
+ shorten_base64_images: T::Boolean,
86
+ url_regex: String,
87
+ use_main_content_only: T::Boolean,
88
+ request_options: ContextDev::RequestOptions::OrHash
89
+ ).returns(T.attached_class)
90
+ end
91
+ def self.new(
92
+ # The starting URL for the crawl (must include http:// or https:// protocol)
93
+ url:,
94
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
95
+ # docs.example.com when starting from example.com). www and apex are always
96
+ # treated as equivalent.
97
+ follow_subdomains: nil,
98
+ # Include image references in the Markdown output
99
+ include_images: nil,
100
+ # Preserve hyperlinks in the Markdown output
101
+ include_links: nil,
102
+ # Maximum link depth from the starting URL (0 = only the starting page)
103
+ max_depth: nil,
104
+ # Maximum number of pages to crawl. Hard cap: 500.
105
+ max_pages: nil,
106
+ # Truncate base64-encoded image data in the Markdown output
107
+ shorten_base64_images: nil,
108
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
109
+ url_regex: nil,
110
+ # Extract only the main content, stripping headers, footers, sidebars, and
111
+ # navigation
112
+ use_main_content_only: nil,
113
+ request_options: {}
114
+ )
115
+ end
116
+
117
+ sig do
118
+ override.returns(
119
+ {
120
+ url: String,
121
+ follow_subdomains: T::Boolean,
122
+ include_images: T::Boolean,
123
+ include_links: T::Boolean,
124
+ max_depth: Integer,
125
+ max_pages: Integer,
126
+ shorten_base64_images: T::Boolean,
127
+ url_regex: String,
128
+ use_main_content_only: T::Boolean,
129
+ request_options: ContextDev::RequestOptions
130
+ }
131
+ )
132
+ end
133
+ def to_hash
134
+ end
135
+ end
136
+ end
137
+ end
@@ -0,0 +1,230 @@
1
+ # typed: strong
2
+
3
+ module ContextDev
4
+ module Models
5
+ class WebWebCrawlMdResponse < ContextDev::Internal::Type::BaseModel
6
+ OrHash =
7
+ T.type_alias do
8
+ T.any(
9
+ ContextDev::Models::WebWebCrawlMdResponse,
10
+ ContextDev::Internal::AnyHash
11
+ )
12
+ end
13
+
14
+ sig { returns(ContextDev::Models::WebWebCrawlMdResponse::Metadata) }
15
+ attr_reader :metadata
16
+
17
+ sig do
18
+ params(
19
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash
20
+ ).void
21
+ end
22
+ attr_writer :metadata
23
+
24
+ sig do
25
+ returns(T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result])
26
+ end
27
+ attr_accessor :results
28
+
29
+ sig do
30
+ params(
31
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata::OrHash,
32
+ results:
33
+ T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result::OrHash]
34
+ ).returns(T.attached_class)
35
+ end
36
+ def self.new(metadata:, results:)
37
+ end
38
+
39
+ sig do
40
+ override.returns(
41
+ {
42
+ metadata: ContextDev::Models::WebWebCrawlMdResponse::Metadata,
43
+ results: T::Array[ContextDev::Models::WebWebCrawlMdResponse::Result]
44
+ }
45
+ )
46
+ end
47
+ def to_hash
48
+ end
49
+
50
+ class Metadata < ContextDev::Internal::Type::BaseModel
51
+ OrHash =
52
+ T.type_alias do
53
+ T.any(
54
+ ContextDev::Models::WebWebCrawlMdResponse::Metadata,
55
+ ContextDev::Internal::AnyHash
56
+ )
57
+ end
58
+
59
+ # Maximum crawl depth reached during the crawl
60
+ sig { returns(Integer) }
61
+ attr_accessor :max_crawl_depth
62
+
63
+ # Number of pages that failed to crawl
64
+ sig { returns(Integer) }
65
+ attr_accessor :num_failed
66
+
67
+ # Number of pages successfully crawled
68
+ sig { returns(Integer) }
69
+ attr_accessor :num_succeeded
70
+
71
+ # Total number of URLs crawled
72
+ sig { returns(Integer) }
73
+ attr_accessor :num_urls
74
+
75
+ sig do
76
+ params(
77
+ max_crawl_depth: Integer,
78
+ num_failed: Integer,
79
+ num_succeeded: Integer,
80
+ num_urls: Integer
81
+ ).returns(T.attached_class)
82
+ end
83
+ def self.new(
84
+ # Maximum crawl depth reached during the crawl
85
+ max_crawl_depth:,
86
+ # Number of pages that failed to crawl
87
+ num_failed:,
88
+ # Number of pages successfully crawled
89
+ num_succeeded:,
90
+ # Total number of URLs crawled
91
+ num_urls:
92
+ )
93
+ end
94
+
95
+ sig do
96
+ override.returns(
97
+ {
98
+ max_crawl_depth: Integer,
99
+ num_failed: Integer,
100
+ num_succeeded: Integer,
101
+ num_urls: Integer
102
+ }
103
+ )
104
+ end
105
+ def to_hash
106
+ end
107
+ end
108
+
109
+ class Result < ContextDev::Internal::Type::BaseModel
110
+ OrHash =
111
+ T.type_alias do
112
+ T.any(
113
+ ContextDev::Models::WebWebCrawlMdResponse::Result,
114
+ ContextDev::Internal::AnyHash
115
+ )
116
+ end
117
+
118
+ # Extracted page content as Markdown (empty string on failure)
119
+ sig { returns(String) }
120
+ attr_accessor :markdown
121
+
122
+ sig do
123
+ returns(ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata)
124
+ end
125
+ attr_reader :metadata
126
+
127
+ sig do
128
+ params(
129
+ metadata:
130
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
131
+ ).void
132
+ end
133
+ attr_writer :metadata
134
+
135
+ sig do
136
+ params(
137
+ markdown: String,
138
+ metadata:
139
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata::OrHash
140
+ ).returns(T.attached_class)
141
+ end
142
+ def self.new(
143
+ # Extracted page content as Markdown (empty string on failure)
144
+ markdown:,
145
+ metadata:
146
+ )
147
+ end
148
+
149
+ sig do
150
+ override.returns(
151
+ {
152
+ markdown: String,
153
+ metadata:
154
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata
155
+ }
156
+ )
157
+ end
158
+ def to_hash
159
+ end
160
+
161
+ class Metadata < ContextDev::Internal::Type::BaseModel
162
+ OrHash =
163
+ T.type_alias do
164
+ T.any(
165
+ ContextDev::Models::WebWebCrawlMdResponse::Result::Metadata,
166
+ ContextDev::Internal::AnyHash
167
+ )
168
+ end
169
+
170
+ # Depth relative to the start URL. 0 = start URL, 1 = one link away.
171
+ sig { returns(Integer) }
172
+ attr_accessor :crawl_depth
173
+
174
+ # HTTP status code of the response
175
+ sig { returns(Integer) }
176
+ attr_accessor :status_code
177
+
178
+ # true if the page was fetched and parsed successfully
179
+ sig { returns(T::Boolean) }
180
+ attr_accessor :success
181
+
182
+ # The page's <title> content (empty string if unavailable)
183
+ sig { returns(String) }
184
+ attr_accessor :title
185
+
186
+ # The URL that was fetched
187
+ sig { returns(String) }
188
+ attr_accessor :url
189
+
190
+ sig do
191
+ params(
192
+ crawl_depth: Integer,
193
+ status_code: Integer,
194
+ success: T::Boolean,
195
+ title: String,
196
+ url: String
197
+ ).returns(T.attached_class)
198
+ end
199
+ def self.new(
200
+ # Depth relative to the start URL. 0 = start URL, 1 = one link away.
201
+ crawl_depth:,
202
+ # HTTP status code of the response
203
+ status_code:,
204
+ # true if the page was fetched and parsed successfully
205
+ success:,
206
+ # The page's <title> content (empty string if unavailable)
207
+ title:,
208
+ # The URL that was fetched
209
+ url:
210
+ )
211
+ end
212
+
213
+ sig do
214
+ override.returns(
215
+ {
216
+ crawl_depth: Integer,
217
+ status_code: Integer,
218
+ success: T::Boolean,
219
+ title: String,
220
+ url: String
221
+ }
222
+ )
223
+ end
224
+ def to_hash
225
+ end
226
+ end
227
+ end
228
+ end
229
+ end
230
+ end
@@ -37,6 +37,8 @@ module ContextDev
37
37
 
38
38
  WebScreenshotParams = ContextDev::Models::WebScreenshotParams
39
39
 
40
+ WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
41
+
40
42
  WebWebScrapeHTMLParams = ContextDev::Models::WebWebScrapeHTMLParams
41
43
 
42
44
  WebWebScrapeImagesParams = ContextDev::Models::WebWebScrapeImagesParams
@@ -19,8 +19,7 @@ module ContextDev
19
19
  # Domain name to retrieve brand data for (e.g., 'example.com', 'google.com').
20
20
  # Cannot be used with name or ticker parameters.
21
21
  domain:,
22
- # Optional parameter to force the language of the retrieved brand data. Works with
23
- # all three lookup methods.
22
+ # Optional parameter to force the language of the retrieved brand data.
24
23
  force_language: nil,
25
24
  # Optional parameter to optimize the API call for maximum speed. When set to true,
26
25
  # the API will skip time-consuming operations for faster response at the cost of
@@ -5,10 +5,12 @@ module ContextDev
5
5
  class Web
6
6
  # Capture a screenshot of a website. Supports both viewport (standard browser
7
7
  # view) and full-page screenshots. Can also screenshot specific page types (login,
8
- # pricing, etc.) by using heuristics to find the appropriate URL. Returns a URL to
9
- # the uploaded screenshot image hosted on our CDN.
8
+ # pricing, etc.) by using heuristics to find the appropriate URL. Either 'domain'
9
+ # or 'directUrl' must be provided as a query parameter, but not both. Returns a
10
+ # URL to the uploaded screenshot image hosted on our CDN.
10
11
  sig do
11
12
  params(
13
+ direct_url: String,
12
14
  domain: String,
13
15
  full_screenshot:
14
16
  ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
@@ -18,9 +20,13 @@ module ContextDev
18
20
  ).returns(ContextDev::Models::WebScreenshotResponse)
19
21
  end
20
22
  def screenshot(
23
+ # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
24
+ # 'https://example.com/pricing'). When provided, the screenshot is taken of this
25
+ # exact URL.
26
+ direct_url: nil,
21
27
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
22
28
  # domain will be automatically normalized and validated.
23
- domain:,
29
+ domain: nil,
24
30
  # Optional parameter to determine screenshot type. If 'true', takes a full page
25
31
  # screenshot capturing all content. If 'false' or not provided, takes a viewport
26
32
  # screenshot (standard browser view).
@@ -28,7 +34,8 @@ module ContextDev
28
34
  # Optional parameter to specify which page type to screenshot. If provided, the
29
35
  # system will scrape the domain's links and use heuristics to find the most
30
36
  # appropriate URL for the specified page type (30 supported languages). If not
31
- # provided, screenshots the main domain landing page.
37
+ # provided, screenshots the main domain landing page. Only applicable when using
38
+ # 'domain', not 'directUrl'.
32
39
  page: nil,
33
40
  # Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
34
41
  # faster capture with basic quality. If 'quality', optimizes for higher quality
@@ -38,6 +45,49 @@ module ContextDev
38
45
  )
39
46
  end
40
47
 
48
+ # Performs a crawl starting from a given URL, extracts page content as Markdown,
49
+ # and returns results for all crawled pages. Only follows links within the same
50
+ # domain as the starting URL. Costs 1 credit per successful page crawled.
51
+ sig do
52
+ params(
53
+ url: String,
54
+ follow_subdomains: T::Boolean,
55
+ include_images: T::Boolean,
56
+ include_links: T::Boolean,
57
+ max_depth: Integer,
58
+ max_pages: Integer,
59
+ shorten_base64_images: T::Boolean,
60
+ url_regex: String,
61
+ use_main_content_only: T::Boolean,
62
+ request_options: ContextDev::RequestOptions::OrHash
63
+ ).returns(ContextDev::Models::WebWebCrawlMdResponse)
64
+ end
65
+ def web_crawl_md(
66
+ # The starting URL for the crawl (must include http:// or https:// protocol)
67
+ url:,
68
+ # When true, follow links on subdomains of the starting URL's domain (e.g.
69
+ # docs.example.com when starting from example.com). www and apex are always
70
+ # treated as equivalent.
71
+ follow_subdomains: nil,
72
+ # Include image references in the Markdown output
73
+ include_images: nil,
74
+ # Preserve hyperlinks in the Markdown output
75
+ include_links: nil,
76
+ # Maximum link depth from the starting URL (0 = only the starting page)
77
+ max_depth: nil,
78
+ # Maximum number of pages to crawl. Hard cap: 500.
79
+ max_pages: nil,
80
+ # Truncate base64-encoded image data in the Markdown output
81
+ shorten_base64_images: nil,
82
+ # Regex pattern. Only URLs matching this pattern will be followed and scraped.
83
+ url_regex: nil,
84
+ # Extract only the main content, stripping headers, footers, sidebars, and
85
+ # navigation
86
+ use_main_content_only: nil,
87
+ request_options: {}
88
+ )
89
+ end
90
+
41
91
  # Scrapes the given URL and returns the raw HTML content of the page.
42
92
  sig do
43
93
  params(