context.dev 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +37 -0
  3. data/README.md +1 -1
  4. data/lib/context_dev/client.rb +0 -4
  5. data/lib/context_dev/internal/type/base_model.rb +3 -3
  6. data/lib/context_dev/models/brand_identify_from_transaction_params.rb +0 -1
  7. data/lib/context_dev/models/brand_retrieve_by_name_params.rb +5 -5
  8. data/lib/context_dev/models/industry_retrieve_naics_params.rb +4 -4
  9. data/lib/context_dev/models/{style_extract_fonts_params.rb → web_extract_fonts_params.rb} +18 -7
  10. data/lib/context_dev/models/web_extract_fonts_response.rb +168 -0
  11. data/lib/context_dev/models/{style_extract_styleguide_params.rb → web_extract_styleguide_params.rb} +8 -5
  12. data/lib/context_dev/models/{style_extract_styleguide_response.rb → web_extract_styleguide_response.rb} +147 -80
  13. data/lib/context_dev/models/web_screenshot_params.rb +3 -2
  14. data/lib/context_dev/models/web_web_crawl_md_params.rb +11 -1
  15. data/lib/context_dev/models/web_web_scrape_html_params.rb +14 -1
  16. data/lib/context_dev/models/web_web_scrape_md_params.rb +13 -3
  17. data/lib/context_dev/models/web_web_scrape_sitemap_params.rb +12 -4
  18. data/lib/context_dev/models.rb +4 -4
  19. data/lib/context_dev/resources/brand.rb +8 -12
  20. data/lib/context_dev/resources/industry.rb +3 -3
  21. data/lib/context_dev/resources/web.rb +87 -21
  22. data/lib/context_dev/version.rb +1 -1
  23. data/lib/context_dev.rb +4 -5
  24. data/rbi/context_dev/client.rbi +0 -3
  25. data/rbi/context_dev/models/brand_identify_from_transaction_params.rbi +0 -2
  26. data/rbi/context_dev/models/brand_retrieve_by_name_params.rbi +6 -6
  27. data/rbi/context_dev/models/industry_retrieve_naics_params.rbi +6 -6
  28. data/rbi/context_dev/models/{style_extract_fonts_params.rbi → web_extract_fonts_params.rbi} +27 -7
  29. data/rbi/context_dev/models/web_extract_fonts_response.rbi +301 -0
  30. data/rbi/context_dev/models/{style_extract_styleguide_params.rbi → web_extract_styleguide_params.rbi} +12 -6
  31. data/rbi/context_dev/models/{style_extract_styleguide_response.rbi → web_extract_styleguide_response.rbi} +232 -94
  32. data/rbi/context_dev/models/web_screenshot_params.rbi +6 -4
  33. data/rbi/context_dev/models/web_web_crawl_md_params.rbi +15 -0
  34. data/rbi/context_dev/models/web_web_scrape_html_params.rbi +19 -1
  35. data/rbi/context_dev/models/web_web_scrape_md_params.rbi +17 -2
  36. data/rbi/context_dev/models/web_web_scrape_sitemap_params.rbi +15 -4
  37. data/rbi/context_dev/models.rbi +4 -5
  38. data/rbi/context_dev/resources/brand.rbi +9 -14
  39. data/rbi/context_dev/resources/industry.rbi +4 -4
  40. data/rbi/context_dev/resources/web.rbi +83 -17
  41. data/sig/context_dev/client.rbs +0 -2
  42. data/sig/context_dev/models/{style_extract_styleguide_params.rbs → web_extract_fonts_params.rbs} +2 -2
  43. data/sig/context_dev/models/web_extract_fonts_response.rbs +138 -0
  44. data/sig/context_dev/models/{style_extract_fonts_params.rbs → web_extract_styleguide_params.rbs} +13 -5
  45. data/sig/context_dev/models/{style_extract_styleguide_response.rbs → web_extract_styleguide_response.rbs} +147 -95
  46. data/sig/context_dev/models/web_web_crawl_md_params.rbs +7 -0
  47. data/sig/context_dev/models/web_web_scrape_html_params.rbs +8 -1
  48. data/sig/context_dev/models/web_web_scrape_md_params.rbs +7 -0
  49. data/sig/context_dev/models/web_web_scrape_sitemap_params.rbs +7 -1
  50. data/sig/context_dev/models.rbs +4 -4
  51. data/sig/context_dev/resources/web.rbs +18 -0
  52. metadata +14 -17
  53. data/lib/context_dev/models/style_extract_fonts_response.rb +0 -101
  54. data/lib/context_dev/resources/style.rb +0 -76
  55. data/rbi/context_dev/models/style_extract_fonts_response.rbi +0 -153
  56. data/rbi/context_dev/resources/style.rbi +0 -60
  57. data/sig/context_dev/models/style_extract_fonts_response.rbs +0 -82
  58. data/sig/context_dev/resources/style.rbs +0 -20
@@ -13,7 +13,7 @@ module ContextDev
13
13
 
14
14
  # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
15
15
  # 'https://example.com/pricing'). When provided, the screenshot is taken of this
16
- # exact URL.
16
+ # exact URL. You must provide either 'domain' or 'directUrl', but not both.
17
17
  sig { returns(T.nilable(String)) }
18
18
  attr_reader :direct_url
19
19
 
@@ -21,7 +21,8 @@ module ContextDev
21
21
  attr_writer :direct_url
22
22
 
23
23
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
24
- # domain will be automatically normalized and validated.
24
+ # domain will be automatically normalized and validated. You must provide either
25
+ # 'domain' or 'directUrl', but not both.
25
26
  sig { returns(T.nilable(String)) }
26
27
  attr_reader :domain
27
28
 
@@ -90,10 +91,11 @@ module ContextDev
90
91
  def self.new(
91
92
  # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
92
93
  # 'https://example.com/pricing'). When provided, the screenshot is taken of this
93
- # exact URL.
94
+ # exact URL. You must provide either 'domain' or 'directUrl', but not both.
94
95
  direct_url: nil,
95
96
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
96
- # domain will be automatically normalized and validated.
97
+ # domain will be automatically normalized and validated. You must provide either
98
+ # 'domain' or 'directUrl', but not both.
97
99
  domain: nil,
98
100
  # Optional parameter to determine screenshot type. If 'true', takes a full page
99
101
  # screenshot capturing all content. If 'false' or not provided, takes a viewport
@@ -38,6 +38,15 @@ module ContextDev
38
38
  sig { params(include_links: T::Boolean).void }
39
39
  attr_writer :include_links
40
40
 
41
+ # Return a cached result if a prior scrape for the same parameters exists and is
42
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
43
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
44
+ sig { returns(T.nilable(Integer)) }
45
+ attr_reader :max_age_ms
46
+
47
+ sig { params(max_age_ms: Integer).void }
48
+ attr_writer :max_age_ms
49
+
41
50
  # Maximum link depth from the starting URL (0 = only the starting page)
42
51
  sig { returns(T.nilable(Integer)) }
43
52
  attr_reader :max_depth
@@ -80,6 +89,7 @@ module ContextDev
80
89
  follow_subdomains: T::Boolean,
81
90
  include_images: T::Boolean,
82
91
  include_links: T::Boolean,
92
+ max_age_ms: Integer,
83
93
  max_depth: Integer,
84
94
  max_pages: Integer,
85
95
  shorten_base64_images: T::Boolean,
@@ -99,6 +109,10 @@ module ContextDev
99
109
  include_images: nil,
100
110
  # Preserve hyperlinks in the Markdown output
101
111
  include_links: nil,
112
+ # Return a cached result if a prior scrape for the same parameters exists and is
113
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
114
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
115
+ max_age_ms: nil,
102
116
  # Maximum link depth from the starting URL (0 = only the starting page)
103
117
  max_depth: nil,
104
118
  # Maximum number of pages to crawl. Hard cap: 500.
@@ -121,6 +135,7 @@ module ContextDev
121
135
  follow_subdomains: T::Boolean,
122
136
  include_images: T::Boolean,
123
137
  include_links: T::Boolean,
138
+ max_age_ms: Integer,
124
139
  max_depth: Integer,
125
140
  max_pages: Integer,
126
141
  shorten_base64_images: T::Boolean,
@@ -18,22 +18,40 @@ module ContextDev
18
18
  sig { returns(String) }
19
19
  attr_accessor :url
20
20
 
21
+ # Return a cached result if a prior scrape for the same parameters exists and is
22
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
23
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
24
+ sig { returns(T.nilable(Integer)) }
25
+ attr_reader :max_age_ms
26
+
27
+ sig { params(max_age_ms: Integer).void }
28
+ attr_writer :max_age_ms
29
+
21
30
  sig do
22
31
  params(
23
32
  url: String,
33
+ max_age_ms: Integer,
24
34
  request_options: ContextDev::RequestOptions::OrHash
25
35
  ).returns(T.attached_class)
26
36
  end
27
37
  def self.new(
28
38
  # Full URL to scrape (must include http:// or https:// protocol)
29
39
  url:,
40
+ # Return a cached result if a prior scrape for the same parameters exists and is
41
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
42
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
43
+ max_age_ms: nil,
30
44
  request_options: {}
31
45
  )
32
46
  end
33
47
 
34
48
  sig do
35
49
  override.returns(
36
- { url: String, request_options: ContextDev::RequestOptions }
50
+ {
51
+ url: String,
52
+ max_age_ms: Integer,
53
+ request_options: ContextDev::RequestOptions
54
+ }
37
55
  )
38
56
  end
39
57
  def to_hash
@@ -11,7 +11,7 @@ module ContextDev
11
11
  T.any(ContextDev::WebWebScrapeMdParams, ContextDev::Internal::AnyHash)
12
12
  end
13
13
 
14
- # Full URL to scrape and convert to markdown (must include http:// or https://
14
+ # Full URL to scrape into LLM usable Markdown (must include http:// or https://
15
15
  # protocol)
16
16
  sig { returns(String) }
17
17
  attr_accessor :url
@@ -30,6 +30,15 @@ module ContextDev
30
30
  sig { params(include_links: T::Boolean).void }
31
31
  attr_writer :include_links
32
32
 
33
+ # Return a cached result if a prior scrape for the same parameters exists and is
34
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
35
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
36
+ sig { returns(T.nilable(Integer)) }
37
+ attr_reader :max_age_ms
38
+
39
+ sig { params(max_age_ms: Integer).void }
40
+ attr_writer :max_age_ms
41
+
33
42
  # Shorten base64-encoded image data in the Markdown output
34
43
  sig { returns(T.nilable(T::Boolean)) }
35
44
  attr_reader :shorten_base64_images
@@ -50,19 +59,24 @@ module ContextDev
50
59
  url: String,
51
60
  include_images: T::Boolean,
52
61
  include_links: T::Boolean,
62
+ max_age_ms: Integer,
53
63
  shorten_base64_images: T::Boolean,
54
64
  use_main_content_only: T::Boolean,
55
65
  request_options: ContextDev::RequestOptions::OrHash
56
66
  ).returns(T.attached_class)
57
67
  end
58
68
  def self.new(
59
- # Full URL to scrape and convert to markdown (must include http:// or https://
69
+ # Full URL to scrape into LLM usable Markdown (must include http:// or https://
60
70
  # protocol)
61
71
  url:,
62
72
  # Include image references in Markdown output
63
73
  include_images: nil,
64
74
  # Preserve hyperlinks in Markdown output
65
75
  include_links: nil,
76
+ # Return a cached result if a prior scrape for the same parameters exists and is
77
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
78
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
79
+ max_age_ms: nil,
66
80
  # Shorten base64-encoded image data in the Markdown output
67
81
  shorten_base64_images: nil,
68
82
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -78,6 +92,7 @@ module ContextDev
78
92
  url: String,
79
93
  include_images: T::Boolean,
80
94
  include_links: T::Boolean,
95
+ max_age_ms: Integer,
81
96
  shorten_base64_images: T::Boolean,
82
97
  use_main_content_only: T::Boolean,
83
98
  request_options: ContextDev::RequestOptions
@@ -14,8 +14,7 @@ module ContextDev
14
14
  )
15
15
  end
16
16
 
17
- # Domain name to crawl sitemaps for (e.g., 'example.com'). The domain will be
18
- # automatically normalized and validated.
17
+ # Domain to build a sitemap for
19
18
  sig { returns(String) }
20
19
  attr_accessor :domain
21
20
 
@@ -27,20 +26,31 @@ module ContextDev
27
26
  sig { params(max_links: Integer).void }
28
27
  attr_writer :max_links
29
28
 
29
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
30
+ # returned and counted against maxLinks.
31
+ sig { returns(T.nilable(String)) }
32
+ attr_reader :url_regex
33
+
34
+ sig { params(url_regex: String).void }
35
+ attr_writer :url_regex
36
+
30
37
  sig do
31
38
  params(
32
39
  domain: String,
33
40
  max_links: Integer,
41
+ url_regex: String,
34
42
  request_options: ContextDev::RequestOptions::OrHash
35
43
  ).returns(T.attached_class)
36
44
  end
37
45
  def self.new(
38
- # Domain name to crawl sitemaps for (e.g., 'example.com'). The domain will be
39
- # automatically normalized and validated.
46
+ # Domain to build a sitemap for
40
47
  domain:,
41
48
  # Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
42
49
  # Minimum is 1, maximum is 100,000.
43
50
  max_links: nil,
51
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
52
+ # returned and counted against maxLinks.
53
+ url_regex: nil,
44
54
  request_options: {}
45
55
  )
46
56
  end
@@ -50,6 +60,7 @@ module ContextDev
50
60
  {
51
61
  domain: String,
52
62
  max_links: Integer,
63
+ url_regex: String,
53
64
  request_options: ContextDev::RequestOptions
54
65
  }
55
66
  )
@@ -25,16 +25,15 @@ module ContextDev
25
25
 
26
26
  IndustryRetrieveNaicsParams = ContextDev::Models::IndustryRetrieveNaicsParams
27
27
 
28
- StyleExtractFontsParams = ContextDev::Models::StyleExtractFontsParams
29
-
30
- StyleExtractStyleguideParams =
31
- ContextDev::Models::StyleExtractStyleguideParams
32
-
33
28
  UtilityPrefetchByEmailParams =
34
29
  ContextDev::Models::UtilityPrefetchByEmailParams
35
30
 
36
31
  UtilityPrefetchParams = ContextDev::Models::UtilityPrefetchParams
37
32
 
33
+ WebExtractFontsParams = ContextDev::Models::WebExtractFontsParams
34
+
35
+ WebExtractStyleguideParams = ContextDev::Models::WebExtractStyleguideParams
36
+
38
37
  WebScreenshotParams = ContextDev::Models::WebScreenshotParams
39
38
 
40
39
  WebWebCrawlMdParams = ContextDev::Models::WebWebCrawlMdParams
@@ -63,7 +63,6 @@ module ContextDev
63
63
  force_language: nil,
64
64
  # When set to true, the API will perform an additional verification steps to
65
65
  # ensure the identified brand matches the transaction with high confidence.
66
- # Defaults to false.
67
66
  high_confidence_only: nil,
68
67
  # Optional parameter to optimize the API call for maximum speed. When set to true,
69
68
  # the API will skip time-consuming operations for faster response at the cost of
@@ -83,9 +82,8 @@ module ContextDev
83
82
  end
84
83
 
85
84
  # Retrieve brand information using an email address while detecting disposable and
86
- # free email addresses. This endpoint extracts the domain from the email address
87
- # and returns brand data for that domain. Disposable and free email addresses
88
- # (like gmail.com, yahoo.com) will throw a 422 error.
85
+ # free email addresses. Disposable and free email addresses (like gmail.com,
86
+ # yahoo.com) will throw a 422 error.
89
87
  sig do
90
88
  params(
91
89
  email: String,
@@ -116,8 +114,7 @@ module ContextDev
116
114
  end
117
115
 
118
116
  # Retrieve brand information using an ISIN (International Securities
119
- # Identification Number). This endpoint looks up the company associated with the
120
- # ISIN and returns its brand data.
117
+ # Identification Number).
121
118
  sig do
122
119
  params(
123
120
  isin: String,
@@ -147,8 +144,7 @@ module ContextDev
147
144
  )
148
145
  end
149
146
 
150
- # Retrieve brand information using a company name. This endpoint searches for the
151
- # company by name and returns its brand data.
147
+ # Retrieve brand information using a company name.
152
148
  sig do
153
149
  params(
154
150
  name: String,
@@ -165,8 +161,8 @@ module ContextDev
165
161
  # Company name to retrieve brand data for (e.g., 'Apple Inc', 'Microsoft
166
162
  # Corporation'). Must be 3-30 characters.
167
163
  name:,
168
- # Optional country code (GL parameter) to specify the country. This affects the
169
- # geographic location used for search queries.
164
+ # Optional country code hint (GL parameter) to specify the country for the company
165
+ # name.
170
166
  country_gl: nil,
171
167
  # Optional parameter to force the language of the retrieved brand data.
172
168
  force_language: nil,
@@ -182,8 +178,7 @@ module ContextDev
182
178
  )
183
179
  end
184
180
 
185
- # Retrieve brand information using a stock ticker symbol. This endpoint looks up
186
- # the company associated with the ticker and returns its brand data.
181
+ # Retrieve brand information using a stock ticker symbol.
187
182
  sig do
188
183
  params(
189
184
  ticker: String,
@@ -217,8 +212,8 @@ module ContextDev
217
212
  end
218
213
 
219
214
  # Returns a simplified version of brand data containing only essential
220
- # information: domain, title, colors, logos, and backdrops. This endpoint is
221
- # optimized for faster responses and reduced data transfer.
215
+ # information: domain, title, colors, logos, and backdrops. Optimized for faster
216
+ # responses and reduced data transfer.
222
217
  sig do
223
218
  params(
224
219
  domain: String,
@@ -3,7 +3,7 @@
3
3
  module ContextDev
4
4
  module Resources
5
5
  class Industry
6
- # Endpoint to classify any brand into a 2022 NAICS code.
6
+ # Classify any brand into 2022 NAICS industry codes from its domain or name.
7
7
  sig do
8
8
  params(
9
9
  input: String,
@@ -14,9 +14,9 @@ module ContextDev
14
14
  ).returns(ContextDev::Models::IndustryRetrieveNaicsResponse)
15
15
  end
16
16
  def retrieve_naics(
17
- # Brand domain or title to retrieve NAICS code for. If a valid domain is provided
18
- # in `input`, it will be used for classification, otherwise, we will search for
19
- # the brand using the provided title.
17
+ # Brand domain or title to retrieve NAICS code for. If a valid domain is provided,
18
+ # it will be used for classification, otherwise, we will search for the brand
19
+ # using the provided title.
20
20
  input:,
21
21
  # Maximum number of NAICS codes to return. Must be between 1 and 10. Defaults
22
22
  # to 5.
@@ -3,11 +3,62 @@
3
3
  module ContextDev
4
4
  module Resources
5
5
  class Web
6
- # Capture a screenshot of a website. Supports both viewport (standard browser
7
- # view) and full-page screenshots. Can also screenshot specific page types (login,
8
- # pricing, etc.) by using heuristics to find the appropriate URL. Either 'domain'
9
- # or 'directUrl' must be provided as a query parameter, but not both. Returns a
10
- # URL to the uploaded screenshot image hosted on our CDN.
6
+ # Scrape font information from a website including font families, usage
7
+ # statistics, fallbacks, and element/word counts.
8
+ sig do
9
+ params(
10
+ direct_url: String,
11
+ domain: String,
12
+ timeout_ms: Integer,
13
+ request_options: ContextDev::RequestOptions::OrHash
14
+ ).returns(ContextDev::Models::WebExtractFontsResponse)
15
+ end
16
+ def extract_fonts(
17
+ # A specific URL to fetch fonts from directly, bypassing domain resolution (e.g.,
18
+ # 'https://example.com/design-system'). When provided, fonts are extracted from
19
+ # this exact URL. You must provide either 'domain' or 'directUrl', but not both.
20
+ direct_url: nil,
21
+ # Domain name to extract fonts from (e.g., 'example.com', 'google.com'). The
22
+ # domain will be automatically normalized and validated. You must provide either
23
+ # 'domain' or 'directUrl', but not both.
24
+ domain: nil,
25
+ # Optional timeout in milliseconds for the request. If the request takes longer
26
+ # than this value, it will be aborted with a 408 status code. Maximum allowed
27
+ # value is 300000ms (5 minutes).
28
+ timeout_ms: nil,
29
+ request_options: {}
30
+ )
31
+ end
32
+
33
+ # Extract a comprehensive design system from a website including colors,
34
+ # typography, spacing, shadows, and UI components.
35
+ sig do
36
+ params(
37
+ direct_url: String,
38
+ domain: String,
39
+ timeout_ms: Integer,
40
+ request_options: ContextDev::RequestOptions::OrHash
41
+ ).returns(ContextDev::Models::WebExtractStyleguideResponse)
42
+ end
43
+ def extract_styleguide(
44
+ # A specific URL to fetch the styleguide from directly, bypassing domain
45
+ # resolution (e.g., 'https://example.com/design-system'). When provided, the
46
+ # styleguide is extracted from this exact URL. You must provide either 'domain' or
47
+ # 'directUrl', but not both.
48
+ direct_url: nil,
49
+ # Domain name to extract styleguide from (e.g., 'example.com', 'google.com'). The
50
+ # domain will be automatically normalized and validated. You must provide either
51
+ # 'domain' or 'directUrl', but not both.
52
+ domain: nil,
53
+ # Optional timeout in milliseconds for the request. If the request takes longer
54
+ # than this value, it will be aborted with a 408 status code. Maximum allowed
55
+ # value is 300000ms (5 minutes).
56
+ timeout_ms: nil,
57
+ request_options: {}
58
+ )
59
+ end
60
+
61
+ # Capture a screenshot of a website.
11
62
  sig do
12
63
  params(
13
64
  direct_url: String,
@@ -22,10 +73,11 @@ module ContextDev
22
73
  def screenshot(
23
74
  # A specific URL to screenshot directly, bypassing domain resolution (e.g.,
24
75
  # 'https://example.com/pricing'). When provided, the screenshot is taken of this
25
- # exact URL.
76
+ # exact URL. You must provide either 'domain' or 'directUrl', but not both.
26
77
  direct_url: nil,
27
78
  # Domain name to take screenshot of (e.g., 'example.com', 'google.com'). The
28
- # domain will be automatically normalized and validated.
79
+ # domain will be automatically normalized and validated. You must provide either
80
+ # 'domain' or 'directUrl', but not both.
29
81
  domain: nil,
30
82
  # Optional parameter to determine screenshot type. If 'true', takes a full page
31
83
  # screenshot capturing all content. If 'false' or not provided, takes a viewport
@@ -46,14 +98,14 @@ module ContextDev
46
98
  end
47
99
 
48
100
  # Performs a crawl starting from a given URL, extracts page content as Markdown,
49
- # and returns results for all crawled pages. Only follows links within the same
50
- # domain as the starting URL. Costs 1 credit per successful page crawled.
101
+ # and returns results for all crawled pages.
51
102
  sig do
52
103
  params(
53
104
  url: String,
54
105
  follow_subdomains: T::Boolean,
55
106
  include_images: T::Boolean,
56
107
  include_links: T::Boolean,
108
+ max_age_ms: Integer,
57
109
  max_depth: Integer,
58
110
  max_pages: Integer,
59
111
  shorten_base64_images: T::Boolean,
@@ -73,6 +125,10 @@ module ContextDev
73
125
  include_images: nil,
74
126
  # Preserve hyperlinks in the Markdown output
75
127
  include_links: nil,
128
+ # Return a cached result if a prior scrape for the same parameters exists and is
129
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
130
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
131
+ max_age_ms: nil,
76
132
  # Maximum link depth from the starting URL (0 = only the starting page)
77
133
  max_depth: nil,
78
134
  # Maximum number of pages to crawl. Hard cap: 500.
@@ -92,12 +148,17 @@ module ContextDev
92
148
  sig do
93
149
  params(
94
150
  url: String,
151
+ max_age_ms: Integer,
95
152
  request_options: ContextDev::RequestOptions::OrHash
96
153
  ).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
97
154
  end
98
155
  def web_scrape_html(
99
156
  # Full URL to scrape (must include http:// or https:// protocol)
100
157
  url:,
158
+ # Return a cached result if a prior scrape for the same parameters exists and is
159
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
160
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
161
+ max_age_ms: nil,
101
162
  request_options: {}
102
163
  )
103
164
  end
@@ -118,26 +179,30 @@ module ContextDev
118
179
  )
119
180
  end
120
181
 
121
- # Scrapes the given URL, converts the HTML content to Markdown, and returns the
122
- # result.
182
+ # Scrapes the given URL into LLM usable Markdown.
123
183
  sig do
124
184
  params(
125
185
  url: String,
126
186
  include_images: T::Boolean,
127
187
  include_links: T::Boolean,
188
+ max_age_ms: Integer,
128
189
  shorten_base64_images: T::Boolean,
129
190
  use_main_content_only: T::Boolean,
130
191
  request_options: ContextDev::RequestOptions::OrHash
131
192
  ).returns(ContextDev::Models::WebWebScrapeMdResponse)
132
193
  end
133
194
  def web_scrape_md(
134
- # Full URL to scrape and convert to markdown (must include http:// or https://
195
+ # Full URL to scrape into LLM usable Markdown (must include http:// or https://
135
196
  # protocol)
136
197
  url:,
137
198
  # Include image references in Markdown output
138
199
  include_images: nil,
139
200
  # Preserve hyperlinks in Markdown output
140
201
  include_links: nil,
202
+ # Return a cached result if a prior scrape for the same parameters exists and is
203
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
204
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
205
+ max_age_ms: nil,
141
206
  # Shorten base64-encoded image data in the Markdown output
142
207
  shorten_base64_images: nil,
143
208
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -147,23 +212,24 @@ module ContextDev
147
212
  )
148
213
  end
149
214
 
150
- # Crawls the sitemap of the given domain and returns all discovered page URLs.
151
- # Supports sitemap index files (recursive), parallel fetching with concurrency
152
- # control, deduplication, and filters out non-page resources (images, PDFs, etc.).
215
+ # Crawl an entire website's sitemap and return all discovered page URLs.
153
216
  sig do
154
217
  params(
155
218
  domain: String,
156
219
  max_links: Integer,
220
+ url_regex: String,
157
221
  request_options: ContextDev::RequestOptions::OrHash
158
222
  ).returns(ContextDev::Models::WebWebScrapeSitemapResponse)
159
223
  end
160
224
  def web_scrape_sitemap(
161
- # Domain name to crawl sitemaps for (e.g., 'example.com'). The domain will be
162
- # automatically normalized and validated.
225
+ # Domain to build a sitemap for
163
226
  domain:,
164
227
  # Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
165
228
  # Minimum is 1, maximum is 100,000.
166
229
  max_links: nil,
230
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
231
+ # returned and counted against maxLinks.
232
+ url_regex: nil,
167
233
  request_options: {}
168
234
  )
169
235
  end
@@ -14,8 +14,6 @@ module ContextDev
14
14
 
15
15
  attr_reader ai: ContextDev::Resources::AI
16
16
 
17
- attr_reader style: ContextDev::Resources::Style
18
-
19
17
  attr_reader brand: ContextDev::Resources::Brand
20
18
 
21
19
  attr_reader industry: ContextDev::Resources::Industry
@@ -1,10 +1,10 @@
1
1
  module ContextDev
2
2
  module Models
3
- type style_extract_styleguide_params =
3
+ type web_extract_fonts_params =
4
4
  { direct_url: String, domain: String, timeout_ms: Integer }
5
5
  & ContextDev::Internal::Type::request_parameters
6
6
 
7
- class StyleExtractStyleguideParams < ContextDev::Internal::Type::BaseModel
7
+ class WebExtractFontsParams < ContextDev::Internal::Type::BaseModel
8
8
  extend ContextDev::Internal::Type::RequestParameters::Converter
9
9
  include ContextDev::Internal::Type::RequestParameters
10
10