context.dev 1.5.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3f1e1d592ab2aa15df4461bc35f04e15efa1b44be4b88d19089042a707ee48c9
4
- data.tar.gz: 7c2386bf4040b419009b3a68a366ff3d6a044116b71590d2036dfe69df4c24de
3
+ metadata.gz: cdcc91e409d0287f98b023ccbf680145a44d7c7ac9dffbf7be5fd1251fdbcc97
4
+ data.tar.gz: f5b48e008a3ed93e386513f9fcd8d215888ce5f8adfbc909cac3adcc5b313a27
5
5
  SHA512:
6
- metadata.gz: 701c64ee8193b73126b354a672c698f41e240d49c96885f0aef670b7ef78e4bad4ad59a21b1ed3d43f16b4a84911ae8931519c09b3702c03c7ec3e86c85a0ca8
7
- data.tar.gz: 74cc296a2d2d6d9a09a3105dc09813fca9de9ea5d98b193ab002fc07bd5cf169019897bff847fdb4291ccb0f4bbdddfd741256489642b42a317ade298e3f2dcd
6
+ metadata.gz: 37ab631ac032f9e15ff462159936d5a3b0e572890552853c4a22603530ae9f56cece10ee3e67a4a4a6e46f7b5f3bbbadcd6beebd2d8bf183243206e97e4f812c
7
+ data.tar.gz: 80757fb5ae2d334ec0716246dfd4702754fb442f6bb2b367b41419cd4f6274d6971b6c5ca7eeb5ff18972eda0c6d2bca9d2f609222e44c97627e1d5f3cc22777
data/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.7.0 (2026-04-24)
4
+
5
+ Full Changelog: [v1.6.0...v1.7.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.6.0...v1.7.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** api update ([9c8b4d9](https://github.com/context-dot-dev/context-ruby-sdk/commit/9c8b4d9b1d813c5b7992998a3fa23cde63fe2f4c))
10
+
11
+ ## 1.6.0 (2026-04-23)
12
+
13
+ Full Changelog: [v1.5.0...v1.6.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.5.0...v1.6.0)
14
+
15
+ ### Features
16
+
17
+ * **api:** api update ([c1543f7](https://github.com/context-dot-dev/context-ruby-sdk/commit/c1543f7513b2d88b0c8ad8e209e62075bd9658bc))
18
+ * **api:** api update ([9c98e20](https://github.com/context-dot-dev/context-ruby-sdk/commit/9c98e20725dcda8466a2274b61be75943d0564d9))
19
+ * **api:** api update ([8ccafd9](https://github.com/context-dot-dev/context-ruby-sdk/commit/8ccafd9625079f40393fe23890958a80d8bdb4e8))
20
+
21
+
22
+ ### Chores
23
+
24
+ * **internal:** more robust bootstrap script ([57b2d93](https://github.com/context-dot-dev/context-ruby-sdk/commit/57b2d93877e5ceeb388679f239e21c09b087b2a1))
25
+
3
26
  ## 1.5.0 (2026-04-19)
4
27
 
5
28
  Full Changelog: [v1.4.0...v1.5.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.4.0...v1.5.0)
data/README.md CHANGED
@@ -26,7 +26,7 @@ To use this gem, install via Bundler by adding the following to your application
26
26
  <!-- x-release-please-start-version -->
27
27
 
28
28
  ```ruby
29
- gem "context.dev", "~> 1.5.0"
29
+ gem "context.dev", "~> 1.7.0"
30
30
  ```
31
31
 
32
32
  <!-- x-release-please-end -->
@@ -29,7 +29,20 @@ module ContextDev
29
29
  # @return [String]
30
30
  required :status, String
31
31
 
32
- # @!method initialize(code:, domain:, fonts:, status:)
32
+ # @!attribute font_links
33
+ # Font assets keyed by family name as it appears in the fonts array (non-generic
34
+ # names only). Clients match entries in fonts to pick a file URL from files.
35
+ # Omitted when no families resolve to Google or custom @font-face URLs.
36
+ #
37
+ # @return [Hash{Symbol=>ContextDev::Models::WebExtractFontsResponse::FontLink}, nil]
38
+ optional :font_links,
39
+ -> { ContextDev::Internal::Type::HashOf[ContextDev::Models::WebExtractFontsResponse::FontLink] },
40
+ api_name: :fontLinks
41
+
42
+ # @!method initialize(code:, domain:, fonts:, status:, font_links: nil)
43
+ # Some parameter documentations has been truncated, see
44
+ # {ContextDev::Models::WebExtractFontsResponse} for more details.
45
+ #
33
46
  # @param code [Integer] HTTP status code, e.g., 200
34
47
  #
35
48
  # @param domain [String] The normalized domain that was processed
@@ -37,6 +50,8 @@ module ContextDev
37
50
  # @param fonts [Array<ContextDev::Models::WebExtractFontsResponse::Font>] Array of font usage information
38
51
  #
39
52
  # @param status [String] Status of the response, e.g., 'ok'
53
+ #
54
+ # @param font_links [Hash{Symbol=>ContextDev::Models::WebExtractFontsResponse::FontLink}] Font assets keyed by family name as it appears in the fonts array (non-generic n
40
55
 
41
56
  class Font < ContextDev::Internal::Type::BaseModel
42
57
  # @!attribute fallbacks
@@ -96,6 +111,58 @@ module ContextDev
96
111
  #
97
112
  # @param uses [Array<String>] Array of CSS selectors or element types where this font is used
98
113
  end
114
+
115
+ class FontLink < ContextDev::Internal::Type::BaseModel
116
+ # @!attribute files
117
+ # Upright font files keyed by weight string (e.g. "400" for regular, "500",
118
+ # "700"). Values are absolute URLs.
119
+ #
120
+ # @return [Hash{Symbol=>String}]
121
+ required :files, ContextDev::Internal::Type::HashOf[String]
122
+
123
+ # @!attribute type
124
+ #
125
+ # @return [Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink::Type]
126
+ required :type, enum: -> { ContextDev::Models::WebExtractFontsResponse::FontLink::Type }
127
+
128
+ # @!attribute category
129
+ # Google Fonts category when type is google (e.g. sans-serif, serif, monospace,
130
+ # display, handwriting). Omitted for custom fonts when unknown.
131
+ #
132
+ # @return [String, nil]
133
+ optional :category, String
134
+
135
+ # @!attribute display_name
136
+ # Present when type is custom: human-readable name derived from the fontLinks key
137
+ # (strip build/hash suffixes, split camelCase / PascalCase, normalize separators).
138
+ # Google entries omit this.
139
+ #
140
+ # @return [String, nil]
141
+ optional :display_name, String, api_name: :displayName
142
+
143
+ # @!method initialize(files:, type:, category: nil, display_name: nil)
144
+ # Some parameter documentations has been truncated, see
145
+ # {ContextDev::Models::WebExtractFontsResponse::FontLink} for more details.
146
+ #
147
+ # @param files [Hash{Symbol=>String}] Upright font files keyed by weight string (e.g. "400" for regular, "500", "700")
148
+ #
149
+ # @param type [Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink::Type]
150
+ #
151
+ # @param category [String] Google Fonts category when type is google (e.g. sans-serif, serif, monospace, di
152
+ #
153
+ # @param display_name [String] Present when type is custom: human-readable name derived from the fontLinks key
154
+
155
+ # @see ContextDev::Models::WebExtractFontsResponse::FontLink#type
156
+ module Type
157
+ extend ContextDev::Internal::Type::Enum
158
+
159
+ GOOGLE = :google
160
+ CUSTOM = :custom
161
+
162
+ # @!method self.values
163
+ # @return [Array<Symbol>]
164
+ end
165
+ end
99
166
  end
100
167
  end
101
168
  end
@@ -33,6 +33,14 @@ module ContextDev
33
33
  # @return [Boolean, nil]
34
34
  optional :include_links, ContextDev::Internal::Type::Boolean, api_name: :includeLinks
35
35
 
36
+ # @!attribute max_age_ms
37
+ # Return a cached result if a prior scrape for the same parameters exists and is
38
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
39
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
40
+ #
41
+ # @return [Integer, nil]
42
+ optional :max_age_ms, Integer, api_name: :maxAgeMs
43
+
36
44
  # @!attribute max_depth
37
45
  # Maximum link depth from the starting URL (0 = only the starting page)
38
46
  #
@@ -45,6 +53,14 @@ module ContextDev
45
53
  # @return [Integer, nil]
46
54
  optional :max_pages, Integer, api_name: :maxPages
47
55
 
56
+ # @!attribute parse_pdf
57
+ # When true (default), PDF pages are fetched and their text layer is extracted and
58
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
59
+ # entirely (not included in results and not counted as failures).
60
+ #
61
+ # @return [Boolean, nil]
62
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
63
+
48
64
  # @!attribute shorten_base64_images
49
65
  # Truncate base64-encoded image data in the Markdown output
50
66
  #
@@ -64,7 +80,7 @@ module ContextDev
64
80
  # @return [Boolean, nil]
65
81
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
66
82
 
67
- # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
83
+ # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
68
84
  # Some parameter documentations has been truncated, see
69
85
  # {ContextDev::Models::WebWebCrawlMdParams} for more details.
70
86
  #
@@ -76,10 +92,14 @@ module ContextDev
76
92
  #
77
93
  # @param include_links [Boolean] Preserve hyperlinks in the Markdown output
78
94
  #
95
+ # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
96
+ #
79
97
  # @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
80
98
  #
81
99
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
82
100
  #
101
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
102
+ #
83
103
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
84
104
  #
85
105
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -33,6 +33,12 @@ module ContextDev
33
33
  # @return [Integer]
34
34
  required :num_failed, Integer, api_name: :numFailed
35
35
 
36
+ # @!attribute num_skipped
37
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
38
+ #
39
+ # @return [Integer]
40
+ required :num_skipped, Integer, api_name: :numSkipped
41
+
36
42
  # @!attribute num_succeeded
37
43
  # Number of pages successfully crawled
38
44
  #
@@ -45,11 +51,16 @@ module ContextDev
45
51
  # @return [Integer]
46
52
  required :num_urls, Integer, api_name: :numUrls
47
53
 
48
- # @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
54
+ # @!method initialize(max_crawl_depth:, num_failed:, num_skipped:, num_succeeded:, num_urls:)
55
+ # Some parameter documentations has been truncated, see
56
+ # {ContextDev::Models::WebWebCrawlMdResponse::Metadata} for more details.
57
+ #
49
58
  # @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
50
59
  #
51
60
  # @param num_failed [Integer] Number of pages that failed to crawl
52
61
  #
62
+ # @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
63
+ #
53
64
  # @param num_succeeded [Integer] Number of pages successfully crawled
54
65
  #
55
66
  # @param num_urls [Integer] Total number of URLs crawled
@@ -21,7 +21,15 @@ module ContextDev
21
21
  # @return [Integer, nil]
22
22
  optional :max_age_ms, Integer
23
23
 
24
- # @!method initialize(url:, max_age_ms: nil, request_options: {})
24
+ # @!attribute parse_pdf
25
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
26
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
27
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
28
+ #
29
+ # @return [Boolean, nil]
30
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
31
+
32
+ # @!method initialize(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
25
33
  # Some parameter documentations has been truncated, see
26
34
  # {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
27
35
  #
@@ -29,6 +37,8 @@ module ContextDev
29
37
  #
30
38
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
31
39
  #
40
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
41
+ #
32
42
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
33
43
  end
34
44
  end
@@ -34,6 +34,14 @@ module ContextDev
34
34
  # @return [Integer, nil]
35
35
  optional :max_age_ms, Integer
36
36
 
37
+ # @!attribute parse_pdf
38
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
39
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
40
+ # WEBSITE_ACCESS_ERROR is returned.
41
+ #
42
+ # @return [Boolean, nil]
43
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
44
+
37
45
  # @!attribute shorten_base64_images
38
46
  # Shorten base64-encoded image data in the Markdown output
39
47
  #
@@ -47,7 +55,7 @@ module ContextDev
47
55
  # @return [Boolean, nil]
48
56
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean
49
57
 
50
- # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
58
+ # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
51
59
  # Some parameter documentations has been truncated, see
52
60
  # {ContextDev::Models::WebWebScrapeMdParams} for more details.
53
61
  #
@@ -59,6 +67,8 @@ module ContextDev
59
67
  #
60
68
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
61
69
  #
70
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
71
+ #
62
72
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
63
73
  #
64
74
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -20,7 +20,14 @@ module ContextDev
20
20
  # @return [Integer, nil]
21
21
  optional :max_links, Integer
22
22
 
23
- # @!method initialize(domain:, max_links: nil, request_options: {})
23
+ # @!attribute url_regex
24
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
25
+ # returned and counted against maxLinks.
26
+ #
27
+ # @return [String, nil]
28
+ optional :url_regex, String
29
+
30
+ # @!method initialize(domain:, max_links: nil, url_regex: nil, request_options: {})
24
31
  # Some parameter documentations has been truncated, see
25
32
  # {ContextDev::Models::WebWebScrapeSitemapParams} for more details.
26
33
  #
@@ -28,6 +35,8 @@ module ContextDev
28
35
  #
29
36
  # @param max_links [Integer] Maximum number of links to return from the sitemap crawl. Defaults to 10,000. Mi
30
37
  #
38
+ # @param url_regex [String] Optional RE2-compatible regex pattern. Only URLs matching this pattern are retur
39
+ #
31
40
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
32
41
  end
33
42
  end
@@ -105,7 +105,7 @@ module ContextDev
105
105
  # Performs a crawl starting from a given URL, extracts page content as Markdown,
106
106
  # and returns results for all crawled pages.
107
107
  #
108
- # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
108
+ # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
109
109
  #
110
110
  # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
111
111
  #
@@ -115,10 +115,14 @@ module ContextDev
115
115
  #
116
116
  # @param include_links [Boolean] Preserve hyperlinks in the Markdown output
117
117
  #
118
+ # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
119
+ #
118
120
  # @param max_depth [Integer] Maximum link depth from the starting URL (0 = only the starting page)
119
121
  #
120
122
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
121
123
  #
124
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
125
+ #
122
126
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
123
127
  #
124
128
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -146,12 +150,14 @@ module ContextDev
146
150
  #
147
151
  # Scrapes the given URL and returns the raw HTML content of the page.
148
152
  #
149
- # @overload web_scrape_html(url:, max_age_ms: nil, request_options: {})
153
+ # @overload web_scrape_html(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
150
154
  #
151
155
  # @param url [String] Full URL to scrape (must include http:// or https:// protocol)
152
156
  #
153
157
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
154
158
  #
159
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
160
+ #
155
161
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
156
162
  #
157
163
  # @return [ContextDev::Models::WebWebScrapeHTMLResponse]
@@ -163,7 +169,7 @@ module ContextDev
163
169
  @client.request(
164
170
  method: :get,
165
171
  path: "web/scrape/html",
166
- query: query.transform_keys(max_age_ms: "maxAgeMs"),
172
+ query: query.transform_keys(max_age_ms: "maxAgeMs", parse_pdf: "parsePDF"),
167
173
  model: ContextDev::Models::WebWebScrapeHTMLResponse,
168
174
  options: options
169
175
  )
@@ -199,7 +205,7 @@ module ContextDev
199
205
  #
200
206
  # Scrapes the given URL into LLM usable Markdown.
201
207
  #
202
- # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
208
+ # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
203
209
  #
204
210
  # @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
205
211
  #
@@ -209,6 +215,8 @@ module ContextDev
209
215
  #
210
216
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
211
217
  #
218
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
219
+ #
212
220
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
213
221
  #
214
222
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -228,6 +236,7 @@ module ContextDev
228
236
  include_images: "includeImages",
229
237
  include_links: "includeLinks",
230
238
  max_age_ms: "maxAgeMs",
239
+ parse_pdf: "parsePDF",
231
240
  shorten_base64_images: "shortenBase64Images",
232
241
  use_main_content_only: "useMainContentOnly"
233
242
  ),
@@ -241,12 +250,14 @@ module ContextDev
241
250
  #
242
251
  # Crawl an entire website's sitemap and return all discovered page URLs.
243
252
  #
244
- # @overload web_scrape_sitemap(domain:, max_links: nil, request_options: {})
253
+ # @overload web_scrape_sitemap(domain:, max_links: nil, url_regex: nil, request_options: {})
245
254
  #
246
255
  # @param domain [String] Domain to build a sitemap for
247
256
  #
248
257
  # @param max_links [Integer] Maximum number of links to return from the sitemap crawl. Defaults to 10,000. Mi
249
258
  #
259
+ # @param url_regex [String] Optional RE2-compatible regex pattern. Only URLs matching this pattern are retur
260
+ #
250
261
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
251
262
  #
252
263
  # @return [ContextDev::Models::WebWebScrapeSitemapResponse]
@@ -258,7 +269,7 @@ module ContextDev
258
269
  @client.request(
259
270
  method: :get,
260
271
  path: "web/scrape/sitemap",
261
- query: query.transform_keys(max_links: "maxLinks"),
272
+ query: query.transform_keys(max_links: "maxLinks", url_regex: "urlRegex"),
262
273
  model: ContextDev::Models::WebWebScrapeSitemapResponse,
263
274
  options: options
264
275
  )
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ContextDev
4
- VERSION = "1.5.0"
4
+ VERSION = "1.7.0"
5
5
  end
@@ -29,13 +29,44 @@ module ContextDev
29
29
  sig { returns(String) }
30
30
  attr_accessor :status
31
31
 
32
+ # Font assets keyed by family name as it appears in the fonts array (non-generic
33
+ # names only). Clients match entries in fonts to pick a file URL from files.
34
+ # Omitted when no families resolve to Google or custom @font-face URLs.
35
+ sig do
36
+ returns(
37
+ T.nilable(
38
+ T::Hash[
39
+ Symbol,
40
+ ContextDev::Models::WebExtractFontsResponse::FontLink
41
+ ]
42
+ )
43
+ )
44
+ end
45
+ attr_reader :font_links
46
+
47
+ sig do
48
+ params(
49
+ font_links:
50
+ T::Hash[
51
+ Symbol,
52
+ ContextDev::Models::WebExtractFontsResponse::FontLink::OrHash
53
+ ]
54
+ ).void
55
+ end
56
+ attr_writer :font_links
57
+
32
58
  sig do
33
59
  params(
34
60
  code: Integer,
35
61
  domain: String,
36
62
  fonts:
37
63
  T::Array[ContextDev::Models::WebExtractFontsResponse::Font::OrHash],
38
- status: String
64
+ status: String,
65
+ font_links:
66
+ T::Hash[
67
+ Symbol,
68
+ ContextDev::Models::WebExtractFontsResponse::FontLink::OrHash
69
+ ]
39
70
  ).returns(T.attached_class)
40
71
  end
41
72
  def self.new(
@@ -46,7 +77,11 @@ module ContextDev
46
77
  # Array of font usage information
47
78
  fonts:,
48
79
  # Status of the response, e.g., 'ok'
49
- status:
80
+ status:,
81
+ # Font assets keyed by family name as it appears in the fonts array (non-generic
82
+ # names only). Clients match entries in fonts to pick a file URL from files.
83
+ # Omitted when no families resolve to Google or custom @font-face URLs.
84
+ font_links: nil
50
85
  )
51
86
  end
52
87
 
@@ -56,7 +91,12 @@ module ContextDev
56
91
  code: Integer,
57
92
  domain: String,
58
93
  fonts: T::Array[ContextDev::Models::WebExtractFontsResponse::Font],
59
- status: String
94
+ status: String,
95
+ font_links:
96
+ T::Hash[
97
+ Symbol,
98
+ ContextDev::Models::WebExtractFontsResponse::FontLink
99
+ ]
60
100
  }
61
101
  )
62
102
  end
@@ -145,6 +185,117 @@ module ContextDev
145
185
  def to_hash
146
186
  end
147
187
  end
188
+
189
+ class FontLink < ContextDev::Internal::Type::BaseModel
190
+ OrHash =
191
+ T.type_alias do
192
+ T.any(
193
+ ContextDev::Models::WebExtractFontsResponse::FontLink,
194
+ ContextDev::Internal::AnyHash
195
+ )
196
+ end
197
+
198
+ # Upright font files keyed by weight string (e.g. "400" for regular, "500",
199
+ # "700"). Values are absolute URLs.
200
+ sig { returns(T::Hash[Symbol, String]) }
201
+ attr_accessor :files
202
+
203
+ sig do
204
+ returns(
205
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::TaggedSymbol
206
+ )
207
+ end
208
+ attr_accessor :type
209
+
210
+ # Google Fonts category when type is google (e.g. sans-serif, serif, monospace,
211
+ # display, handwriting). Omitted for custom fonts when unknown.
212
+ sig { returns(T.nilable(String)) }
213
+ attr_reader :category
214
+
215
+ sig { params(category: String).void }
216
+ attr_writer :category
217
+
218
+ # Present when type is custom: human-readable name derived from the fontLinks key
219
+ # (strip build/hash suffixes, split camelCase / PascalCase, normalize separators).
220
+ # Google entries omit this.
221
+ sig { returns(T.nilable(String)) }
222
+ attr_reader :display_name
223
+
224
+ sig { params(display_name: String).void }
225
+ attr_writer :display_name
226
+
227
+ sig do
228
+ params(
229
+ files: T::Hash[Symbol, String],
230
+ type:
231
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::OrSymbol,
232
+ category: String,
233
+ display_name: String
234
+ ).returns(T.attached_class)
235
+ end
236
+ def self.new(
237
+ # Upright font files keyed by weight string (e.g. "400" for regular, "500",
238
+ # "700"). Values are absolute URLs.
239
+ files:,
240
+ type:,
241
+ # Google Fonts category when type is google (e.g. sans-serif, serif, monospace,
242
+ # display, handwriting). Omitted for custom fonts when unknown.
243
+ category: nil,
244
+ # Present when type is custom: human-readable name derived from the fontLinks key
245
+ # (strip build/hash suffixes, split camelCase / PascalCase, normalize separators).
246
+ # Google entries omit this.
247
+ display_name: nil
248
+ )
249
+ end
250
+
251
+ sig do
252
+ override.returns(
253
+ {
254
+ files: T::Hash[Symbol, String],
255
+ type:
256
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::TaggedSymbol,
257
+ category: String,
258
+ display_name: String
259
+ }
260
+ )
261
+ end
262
+ def to_hash
263
+ end
264
+
265
+ module Type
266
+ extend ContextDev::Internal::Type::Enum
267
+
268
+ TaggedSymbol =
269
+ T.type_alias do
270
+ T.all(
271
+ Symbol,
272
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type
273
+ )
274
+ end
275
+ OrSymbol = T.type_alias { T.any(Symbol, String) }
276
+
277
+ GOOGLE =
278
+ T.let(
279
+ :google,
280
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::TaggedSymbol
281
+ )
282
+ CUSTOM =
283
+ T.let(
284
+ :custom,
285
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::TaggedSymbol
286
+ )
287
+
288
+ sig do
289
+ override.returns(
290
+ T::Array[
291
+ ContextDev::Models::WebExtractFontsResponse::FontLink::Type::TaggedSymbol
292
+ ]
293
+ )
294
+ end
295
+ def self.values
296
+ end
297
+ end
298
+ end
148
299
  end
149
300
  end
150
301
  end
@@ -38,6 +38,15 @@ module ContextDev
38
38
  sig { params(include_links: T::Boolean).void }
39
39
  attr_writer :include_links
40
40
 
41
+ # Return a cached result if a prior scrape for the same parameters exists and is
42
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
43
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
44
+ sig { returns(T.nilable(Integer)) }
45
+ attr_reader :max_age_ms
46
+
47
+ sig { params(max_age_ms: Integer).void }
48
+ attr_writer :max_age_ms
49
+
41
50
  # Maximum link depth from the starting URL (0 = only the starting page)
42
51
  sig { returns(T.nilable(Integer)) }
43
52
  attr_reader :max_depth
@@ -52,6 +61,15 @@ module ContextDev
52
61
  sig { params(max_pages: Integer).void }
53
62
  attr_writer :max_pages
54
63
 
64
+ # When true (default), PDF pages are fetched and their text layer is extracted and
65
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
66
+ # entirely (not included in results and not counted as failures).
67
+ sig { returns(T.nilable(T::Boolean)) }
68
+ attr_reader :parse_pdf
69
+
70
+ sig { params(parse_pdf: T::Boolean).void }
71
+ attr_writer :parse_pdf
72
+
55
73
  # Truncate base64-encoded image data in the Markdown output
56
74
  sig { returns(T.nilable(T::Boolean)) }
57
75
  attr_reader :shorten_base64_images
@@ -80,8 +98,10 @@ module ContextDev
80
98
  follow_subdomains: T::Boolean,
81
99
  include_images: T::Boolean,
82
100
  include_links: T::Boolean,
101
+ max_age_ms: Integer,
83
102
  max_depth: Integer,
84
103
  max_pages: Integer,
104
+ parse_pdf: T::Boolean,
85
105
  shorten_base64_images: T::Boolean,
86
106
  url_regex: String,
87
107
  use_main_content_only: T::Boolean,
@@ -99,10 +119,18 @@ module ContextDev
99
119
  include_images: nil,
100
120
  # Preserve hyperlinks in the Markdown output
101
121
  include_links: nil,
122
+ # Return a cached result if a prior scrape for the same parameters exists and is
123
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
124
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
125
+ max_age_ms: nil,
102
126
  # Maximum link depth from the starting URL (0 = only the starting page)
103
127
  max_depth: nil,
104
128
  # Maximum number of pages to crawl. Hard cap: 500.
105
129
  max_pages: nil,
130
+ # When true (default), PDF pages are fetched and their text layer is extracted and
131
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
132
+ # entirely (not included in results and not counted as failures).
133
+ parse_pdf: nil,
106
134
  # Truncate base64-encoded image data in the Markdown output
107
135
  shorten_base64_images: nil,
108
136
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -121,8 +149,10 @@ module ContextDev
121
149
  follow_subdomains: T::Boolean,
122
150
  include_images: T::Boolean,
123
151
  include_links: T::Boolean,
152
+ max_age_ms: Integer,
124
153
  max_depth: Integer,
125
154
  max_pages: Integer,
155
+ parse_pdf: T::Boolean,
126
156
  shorten_base64_images: T::Boolean,
127
157
  url_regex: String,
128
158
  use_main_content_only: T::Boolean,
@@ -64,6 +64,10 @@ module ContextDev
64
64
  sig { returns(Integer) }
65
65
  attr_accessor :num_failed
66
66
 
67
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
68
+ sig { returns(Integer) }
69
+ attr_accessor :num_skipped
70
+
67
71
  # Number of pages successfully crawled
68
72
  sig { returns(Integer) }
69
73
  attr_accessor :num_succeeded
@@ -76,6 +80,7 @@ module ContextDev
76
80
  params(
77
81
  max_crawl_depth: Integer,
78
82
  num_failed: Integer,
83
+ num_skipped: Integer,
79
84
  num_succeeded: Integer,
80
85
  num_urls: Integer
81
86
  ).returns(T.attached_class)
@@ -85,6 +90,8 @@ module ContextDev
85
90
  max_crawl_depth:,
86
91
  # Number of pages that failed to crawl
87
92
  num_failed:,
93
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
94
+ num_skipped:,
88
95
  # Number of pages successfully crawled
89
96
  num_succeeded:,
90
97
  # Total number of URLs crawled
@@ -97,6 +104,7 @@ module ContextDev
97
104
  {
98
105
  max_crawl_depth: Integer,
99
106
  num_failed: Integer,
107
+ num_skipped: Integer,
100
108
  num_succeeded: Integer,
101
109
  num_urls: Integer
102
110
  }
@@ -27,10 +27,20 @@ module ContextDev
27
27
  sig { params(max_age_ms: Integer).void }
28
28
  attr_writer :max_age_ms
29
29
 
30
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
31
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
32
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
33
+ sig { returns(T.nilable(T::Boolean)) }
34
+ attr_reader :parse_pdf
35
+
36
+ sig { params(parse_pdf: T::Boolean).void }
37
+ attr_writer :parse_pdf
38
+
30
39
  sig do
31
40
  params(
32
41
  url: String,
33
42
  max_age_ms: Integer,
43
+ parse_pdf: T::Boolean,
34
44
  request_options: ContextDev::RequestOptions::OrHash
35
45
  ).returns(T.attached_class)
36
46
  end
@@ -41,6 +51,10 @@ module ContextDev
41
51
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
42
52
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
43
53
  max_age_ms: nil,
54
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
55
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
56
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
57
+ parse_pdf: nil,
44
58
  request_options: {}
45
59
  )
46
60
  end
@@ -50,6 +64,7 @@ module ContextDev
50
64
  {
51
65
  url: String,
52
66
  max_age_ms: Integer,
67
+ parse_pdf: T::Boolean,
53
68
  request_options: ContextDev::RequestOptions
54
69
  }
55
70
  )
@@ -39,6 +39,15 @@ module ContextDev
39
39
  sig { params(max_age_ms: Integer).void }
40
40
  attr_writer :max_age_ms
41
41
 
42
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
43
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
44
+ # WEBSITE_ACCESS_ERROR is returned.
45
+ sig { returns(T.nilable(T::Boolean)) }
46
+ attr_reader :parse_pdf
47
+
48
+ sig { params(parse_pdf: T::Boolean).void }
49
+ attr_writer :parse_pdf
50
+
42
51
  # Shorten base64-encoded image data in the Markdown output
43
52
  sig { returns(T.nilable(T::Boolean)) }
44
53
  attr_reader :shorten_base64_images
@@ -60,6 +69,7 @@ module ContextDev
60
69
  include_images: T::Boolean,
61
70
  include_links: T::Boolean,
62
71
  max_age_ms: Integer,
72
+ parse_pdf: T::Boolean,
63
73
  shorten_base64_images: T::Boolean,
64
74
  use_main_content_only: T::Boolean,
65
75
  request_options: ContextDev::RequestOptions::OrHash
@@ -77,6 +87,10 @@ module ContextDev
77
87
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
78
88
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
79
89
  max_age_ms: nil,
90
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
91
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
92
+ # WEBSITE_ACCESS_ERROR is returned.
93
+ parse_pdf: nil,
80
94
  # Shorten base64-encoded image data in the Markdown output
81
95
  shorten_base64_images: nil,
82
96
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -93,6 +107,7 @@ module ContextDev
93
107
  include_images: T::Boolean,
94
108
  include_links: T::Boolean,
95
109
  max_age_ms: Integer,
110
+ parse_pdf: T::Boolean,
96
111
  shorten_base64_images: T::Boolean,
97
112
  use_main_content_only: T::Boolean,
98
113
  request_options: ContextDev::RequestOptions
@@ -26,10 +26,19 @@ module ContextDev
26
26
  sig { params(max_links: Integer).void }
27
27
  attr_writer :max_links
28
28
 
29
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
30
+ # returned and counted against maxLinks.
31
+ sig { returns(T.nilable(String)) }
32
+ attr_reader :url_regex
33
+
34
+ sig { params(url_regex: String).void }
35
+ attr_writer :url_regex
36
+
29
37
  sig do
30
38
  params(
31
39
  domain: String,
32
40
  max_links: Integer,
41
+ url_regex: String,
33
42
  request_options: ContextDev::RequestOptions::OrHash
34
43
  ).returns(T.attached_class)
35
44
  end
@@ -39,6 +48,9 @@ module ContextDev
39
48
  # Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
40
49
  # Minimum is 1, maximum is 100,000.
41
50
  max_links: nil,
51
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
52
+ # returned and counted against maxLinks.
53
+ url_regex: nil,
42
54
  request_options: {}
43
55
  )
44
56
  end
@@ -48,6 +60,7 @@ module ContextDev
48
60
  {
49
61
  domain: String,
50
62
  max_links: Integer,
63
+ url_regex: String,
51
64
  request_options: ContextDev::RequestOptions
52
65
  }
53
66
  )
@@ -105,8 +105,10 @@ module ContextDev
105
105
  follow_subdomains: T::Boolean,
106
106
  include_images: T::Boolean,
107
107
  include_links: T::Boolean,
108
+ max_age_ms: Integer,
108
109
  max_depth: Integer,
109
110
  max_pages: Integer,
111
+ parse_pdf: T::Boolean,
110
112
  shorten_base64_images: T::Boolean,
111
113
  url_regex: String,
112
114
  use_main_content_only: T::Boolean,
@@ -124,10 +126,18 @@ module ContextDev
124
126
  include_images: nil,
125
127
  # Preserve hyperlinks in the Markdown output
126
128
  include_links: nil,
129
+ # Return a cached result if a prior scrape for the same parameters exists and is
130
+ # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
131
+ # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
132
+ max_age_ms: nil,
127
133
  # Maximum link depth from the starting URL (0 = only the starting page)
128
134
  max_depth: nil,
129
135
  # Maximum number of pages to crawl. Hard cap: 500.
130
136
  max_pages: nil,
137
+ # When true (default), PDF pages are fetched and their text layer is extracted and
138
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
139
+ # entirely (not included in results and not counted as failures).
140
+ parse_pdf: nil,
131
141
  # Truncate base64-encoded image data in the Markdown output
132
142
  shorten_base64_images: nil,
133
143
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -144,6 +154,7 @@ module ContextDev
144
154
  params(
145
155
  url: String,
146
156
  max_age_ms: Integer,
157
+ parse_pdf: T::Boolean,
147
158
  request_options: ContextDev::RequestOptions::OrHash
148
159
  ).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
149
160
  end
@@ -154,6 +165,10 @@ module ContextDev
154
165
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
155
166
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
156
167
  max_age_ms: nil,
168
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
169
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
170
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
171
+ parse_pdf: nil,
157
172
  request_options: {}
158
173
  )
159
174
  end
@@ -181,6 +196,7 @@ module ContextDev
181
196
  include_images: T::Boolean,
182
197
  include_links: T::Boolean,
183
198
  max_age_ms: Integer,
199
+ parse_pdf: T::Boolean,
184
200
  shorten_base64_images: T::Boolean,
185
201
  use_main_content_only: T::Boolean,
186
202
  request_options: ContextDev::RequestOptions::OrHash
@@ -198,6 +214,10 @@ module ContextDev
198
214
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
199
215
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
200
216
  max_age_ms: nil,
217
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
218
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
219
+ # WEBSITE_ACCESS_ERROR is returned.
220
+ parse_pdf: nil,
201
221
  # Shorten base64-encoded image data in the Markdown output
202
222
  shorten_base64_images: nil,
203
223
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -212,6 +232,7 @@ module ContextDev
212
232
  params(
213
233
  domain: String,
214
234
  max_links: Integer,
235
+ url_regex: String,
215
236
  request_options: ContextDev::RequestOptions::OrHash
216
237
  ).returns(ContextDev::Models::WebWebScrapeSitemapResponse)
217
238
  end
@@ -221,6 +242,9 @@ module ContextDev
221
242
  # Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
222
243
  # Minimum is 1, maximum is 100,000.
223
244
  max_links: nil,
245
+ # Optional RE2-compatible regex pattern. Only URLs matching this pattern are
246
+ # returned and counted against maxLinks.
247
+ url_regex: nil,
224
248
  request_options: {}
225
249
  )
226
250
  end
@@ -5,7 +5,8 @@ module ContextDev
5
5
  code: Integer,
6
6
  domain: String,
7
7
  fonts: ::Array[ContextDev::Models::WebExtractFontsResponse::Font],
8
- status: String
8
+ status: String,
9
+ font_links: ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]
9
10
  }
10
11
 
11
12
  class WebExtractFontsResponse < ContextDev::Internal::Type::BaseModel
@@ -17,18 +18,26 @@ module ContextDev
17
18
 
18
19
  attr_accessor status: String
19
20
 
21
+ attr_reader font_links: ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]?
22
+
23
+ def font_links=: (
24
+ ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]
25
+ ) -> ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]
26
+
20
27
  def initialize: (
21
28
  code: Integer,
22
29
  domain: String,
23
30
  fonts: ::Array[ContextDev::Models::WebExtractFontsResponse::Font],
24
- status: String
31
+ status: String,
32
+ ?font_links: ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]
25
33
  ) -> void
26
34
 
27
35
  def to_hash: -> {
28
36
  code: Integer,
29
37
  domain: String,
30
38
  fonts: ::Array[ContextDev::Models::WebExtractFontsResponse::Font],
31
- status: String
39
+ status: String,
40
+ font_links: ::Hash[Symbol, ContextDev::Models::WebExtractFontsResponse::FontLink]
32
41
  }
33
42
 
34
43
  type font =
@@ -77,6 +86,53 @@ module ContextDev
77
86
  uses: ::Array[String]
78
87
  }
79
88
  end
89
+
90
+ type font_link =
91
+ {
92
+ files: ::Hash[Symbol, String],
93
+ type: ContextDev::Models::WebExtractFontsResponse::FontLink::type_,
94
+ category: String,
95
+ display_name: String
96
+ }
97
+
98
+ class FontLink < ContextDev::Internal::Type::BaseModel
99
+ attr_accessor files: ::Hash[Symbol, String]
100
+
101
+ attr_accessor type: ContextDev::Models::WebExtractFontsResponse::FontLink::type_
102
+
103
+ attr_reader category: String?
104
+
105
+ def category=: (String) -> String
106
+
107
+ attr_reader display_name: String?
108
+
109
+ def display_name=: (String) -> String
110
+
111
+ def initialize: (
112
+ files: ::Hash[Symbol, String],
113
+ type: ContextDev::Models::WebExtractFontsResponse::FontLink::type_,
114
+ ?category: String,
115
+ ?display_name: String
116
+ ) -> void
117
+
118
+ def to_hash: -> {
119
+ files: ::Hash[Symbol, String],
120
+ type: ContextDev::Models::WebExtractFontsResponse::FontLink::type_,
121
+ category: String,
122
+ display_name: String
123
+ }
124
+
125
+ type type_ = :google | :custom
126
+
127
+ module Type
128
+ extend ContextDev::Internal::Type::Enum
129
+
130
+ GOOGLE: :google
131
+ CUSTOM: :custom
132
+
133
+ def self?.values: -> ::Array[ContextDev::Models::WebExtractFontsResponse::FontLink::type_]
134
+ end
135
+ end
80
136
  end
81
137
  end
82
138
  end
@@ -6,8 +6,10 @@ module ContextDev
6
6
  follow_subdomains: bool,
7
7
  include_images: bool,
8
8
  include_links: bool,
9
+ max_age_ms: Integer,
9
10
  max_depth: Integer,
10
11
  max_pages: Integer,
12
+ parse_pdf: bool,
11
13
  :shorten_base64_images => bool,
12
14
  url_regex: String,
13
15
  use_main_content_only: bool
@@ -32,6 +34,10 @@ module ContextDev
32
34
 
33
35
  def include_links=: (bool) -> bool
34
36
 
37
+ attr_reader max_age_ms: Integer?
38
+
39
+ def max_age_ms=: (Integer) -> Integer
40
+
35
41
  attr_reader max_depth: Integer?
36
42
 
37
43
  def max_depth=: (Integer) -> Integer
@@ -40,6 +46,10 @@ module ContextDev
40
46
 
41
47
  def max_pages=: (Integer) -> Integer
42
48
 
49
+ attr_reader parse_pdf: bool?
50
+
51
+ def parse_pdf=: (bool) -> bool
52
+
43
53
  attr_reader shorten_base64_images: bool?
44
54
 
45
55
  def shorten_base64_images=: (bool) -> bool
@@ -57,8 +67,10 @@ module ContextDev
57
67
  ?follow_subdomains: bool,
58
68
  ?include_images: bool,
59
69
  ?include_links: bool,
70
+ ?max_age_ms: Integer,
60
71
  ?max_depth: Integer,
61
72
  ?max_pages: Integer,
73
+ ?parse_pdf: bool,
62
74
  ?shorten_base64_images: bool,
63
75
  ?url_regex: String,
64
76
  ?use_main_content_only: bool,
@@ -70,8 +82,10 @@ module ContextDev
70
82
  follow_subdomains: bool,
71
83
  include_images: bool,
72
84
  include_links: bool,
85
+ max_age_ms: Integer,
73
86
  max_depth: Integer,
74
87
  max_pages: Integer,
88
+ parse_pdf: bool,
75
89
  :shorten_base64_images => bool,
76
90
  url_regex: String,
77
91
  use_main_content_only: bool,
@@ -25,6 +25,7 @@ module ContextDev
25
25
  {
26
26
  max_crawl_depth: Integer,
27
27
  num_failed: Integer,
28
+ num_skipped: Integer,
28
29
  num_succeeded: Integer,
29
30
  num_urls: Integer
30
31
  }
@@ -34,6 +35,8 @@ module ContextDev
34
35
 
35
36
  attr_accessor num_failed: Integer
36
37
 
38
+ attr_accessor num_skipped: Integer
39
+
37
40
  attr_accessor num_succeeded: Integer
38
41
 
39
42
  attr_accessor num_urls: Integer
@@ -41,6 +44,7 @@ module ContextDev
41
44
  def initialize: (
42
45
  max_crawl_depth: Integer,
43
46
  num_failed: Integer,
47
+ num_skipped: Integer,
44
48
  num_succeeded: Integer,
45
49
  num_urls: Integer
46
50
  ) -> void
@@ -48,6 +52,7 @@ module ContextDev
48
52
  def to_hash: -> {
49
53
  max_crawl_depth: Integer,
50
54
  num_failed: Integer,
55
+ num_skipped: Integer,
51
56
  num_succeeded: Integer,
52
57
  num_urls: Integer
53
58
  }
@@ -1,7 +1,7 @@
1
1
  module ContextDev
2
2
  module Models
3
3
  type web_web_scrape_html_params =
4
- { url: String, max_age_ms: Integer }
4
+ { url: String, max_age_ms: Integer, parse_pdf: bool }
5
5
  & ContextDev::Internal::Type::request_parameters
6
6
 
7
7
  class WebWebScrapeHTMLParams < ContextDev::Internal::Type::BaseModel
@@ -14,15 +14,21 @@ module ContextDev
14
14
 
15
15
  def max_age_ms=: (Integer) -> Integer
16
16
 
17
+ attr_reader parse_pdf: bool?
18
+
19
+ def parse_pdf=: (bool) -> bool
20
+
17
21
  def initialize: (
18
22
  url: String,
19
23
  ?max_age_ms: Integer,
24
+ ?parse_pdf: bool,
20
25
  ?request_options: ContextDev::request_opts
21
26
  ) -> void
22
27
 
23
28
  def to_hash: -> {
24
29
  url: String,
25
30
  max_age_ms: Integer,
31
+ parse_pdf: bool,
26
32
  request_options: ContextDev::RequestOptions
27
33
  }
28
34
  end
@@ -6,6 +6,7 @@ module ContextDev
6
6
  include_images: bool,
7
7
  include_links: bool,
8
8
  max_age_ms: Integer,
9
+ parse_pdf: bool,
9
10
  :shorten_base64_images => bool,
10
11
  use_main_content_only: bool
11
12
  }
@@ -29,6 +30,10 @@ module ContextDev
29
30
 
30
31
  def max_age_ms=: (Integer) -> Integer
31
32
 
33
+ attr_reader parse_pdf: bool?
34
+
35
+ def parse_pdf=: (bool) -> bool
36
+
32
37
  attr_reader shorten_base64_images: bool?
33
38
 
34
39
  def shorten_base64_images=: (bool) -> bool
@@ -42,6 +47,7 @@ module ContextDev
42
47
  ?include_images: bool,
43
48
  ?include_links: bool,
44
49
  ?max_age_ms: Integer,
50
+ ?parse_pdf: bool,
45
51
  ?shorten_base64_images: bool,
46
52
  ?use_main_content_only: bool,
47
53
  ?request_options: ContextDev::request_opts
@@ -52,6 +58,7 @@ module ContextDev
52
58
  include_images: bool,
53
59
  include_links: bool,
54
60
  max_age_ms: Integer,
61
+ parse_pdf: bool,
55
62
  :shorten_base64_images => bool,
56
63
  use_main_content_only: bool,
57
64
  request_options: ContextDev::RequestOptions
@@ -1,7 +1,7 @@
1
1
  module ContextDev
2
2
  module Models
3
3
  type web_web_scrape_sitemap_params =
4
- { domain: String, max_links: Integer }
4
+ { domain: String, max_links: Integer, url_regex: String }
5
5
  & ContextDev::Internal::Type::request_parameters
6
6
 
7
7
  class WebWebScrapeSitemapParams < ContextDev::Internal::Type::BaseModel
@@ -14,15 +14,21 @@ module ContextDev
14
14
 
15
15
  def max_links=: (Integer) -> Integer
16
16
 
17
+ attr_reader url_regex: String?
18
+
19
+ def url_regex=: (String) -> String
20
+
17
21
  def initialize: (
18
22
  domain: String,
19
23
  ?max_links: Integer,
24
+ ?url_regex: String,
20
25
  ?request_options: ContextDev::request_opts
21
26
  ) -> void
22
27
 
23
28
  def to_hash: -> {
24
29
  domain: String,
25
30
  max_links: Integer,
31
+ url_regex: String,
26
32
  request_options: ContextDev::RequestOptions
27
33
  }
28
34
  end
@@ -29,8 +29,10 @@ module ContextDev
29
29
  ?follow_subdomains: bool,
30
30
  ?include_images: bool,
31
31
  ?include_links: bool,
32
+ ?max_age_ms: Integer,
32
33
  ?max_depth: Integer,
33
34
  ?max_pages: Integer,
35
+ ?parse_pdf: bool,
34
36
  ?shorten_base64_images: bool,
35
37
  ?url_regex: String,
36
38
  ?use_main_content_only: bool,
@@ -40,6 +42,7 @@ module ContextDev
40
42
  def web_scrape_html: (
41
43
  url: String,
42
44
  ?max_age_ms: Integer,
45
+ ?parse_pdf: bool,
43
46
  ?request_options: ContextDev::request_opts
44
47
  ) -> ContextDev::Models::WebWebScrapeHTMLResponse
45
48
 
@@ -53,6 +56,7 @@ module ContextDev
53
56
  ?include_images: bool,
54
57
  ?include_links: bool,
55
58
  ?max_age_ms: Integer,
59
+ ?parse_pdf: bool,
56
60
  ?shorten_base64_images: bool,
57
61
  ?use_main_content_only: bool,
58
62
  ?request_options: ContextDev::request_opts
@@ -61,6 +65,7 @@ module ContextDev
61
65
  def web_scrape_sitemap: (
62
66
  domain: String,
63
67
  ?max_links: Integer,
68
+ ?url_regex: String,
64
69
  ?request_options: ContextDev::request_opts
65
70
  ) -> ContextDev::Models::WebWebScrapeSitemapResponse
66
71
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: context.dev
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.0
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Context Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-19 00:00:00.000000000 Z
11
+ date: 2026-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cgi