context.dev 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e9ffccdbada64211d1e7ddc15ce648d6e4ec77637768a30f71d754d9a8ff788
4
- data.tar.gz: a95c5065d4f22c3a1269026fd93b936086316cb9a1a9e8de5af8e7c6021f0bcc
3
+ metadata.gz: cdcc91e409d0287f98b023ccbf680145a44d7c7ac9dffbf7be5fd1251fdbcc97
4
+ data.tar.gz: f5b48e008a3ed93e386513f9fcd8d215888ce5f8adfbc909cac3adcc5b313a27
5
5
  SHA512:
6
- metadata.gz: 1bf660423aa6381d2301614c3987aedcf0edf8bec72bd2eca8b1ee88d4190cb5a4f8c8dd453783632ad1ed79da87bddbfadf09341c870947cba5784d08600610
7
- data.tar.gz: aa6851f96a3b0295cbc4386ea424b6e4f856889aac74bcd21e8ad7fd6735f5822ae5ed68306891ef3779dfdf1b590fcc32ffb8f069bd78599f8eacb4c6b5b882
6
+ metadata.gz: 37ab631ac032f9e15ff462159936d5a3b0e572890552853c4a22603530ae9f56cece10ee3e67a4a4a6e46f7b5f3bbbadcd6beebd2d8bf183243206e97e4f812c
7
+ data.tar.gz: 80757fb5ae2d334ec0716246dfd4702754fb442f6bb2b367b41419cd4f6274d6971b6c5ca7eeb5ff18972eda0c6d2bca9d2f609222e44c97627e1d5f3cc22777
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.7.0 (2026-04-24)
4
+
5
+ Full Changelog: [v1.6.0...v1.7.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.6.0...v1.7.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** api update ([9c8b4d9](https://github.com/context-dot-dev/context-ruby-sdk/commit/9c8b4d9b1d813c5b7992998a3fa23cde63fe2f4c))
10
+
3
11
  ## 1.6.0 (2026-04-23)
4
12
 
5
13
  Full Changelog: [v1.5.0...v1.6.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.5.0...v1.6.0)
data/README.md CHANGED
@@ -26,7 +26,7 @@ To use this gem, install via Bundler by adding the following to your application
26
26
  <!-- x-release-please-start-version -->
27
27
 
28
28
  ```ruby
29
- gem "context.dev", "~> 1.6.0"
29
+ gem "context.dev", "~> 1.7.0"
30
30
  ```
31
31
 
32
32
  <!-- x-release-please-end -->
@@ -53,6 +53,14 @@ module ContextDev
53
53
  # @return [Integer, nil]
54
54
  optional :max_pages, Integer, api_name: :maxPages
55
55
 
56
+ # @!attribute parse_pdf
57
+ # When true (default), PDF pages are fetched and their text layer is extracted and
58
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
59
+ # entirely (not included in results and not counted as failures).
60
+ #
61
+ # @return [Boolean, nil]
62
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
63
+
56
64
  # @!attribute shorten_base64_images
57
65
  # Truncate base64-encoded image data in the Markdown output
58
66
  #
@@ -72,7 +80,7 @@ module ContextDev
72
80
  # @return [Boolean, nil]
73
81
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
74
82
 
75
- # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
83
+ # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
76
84
  # Some parameter documentations has been truncated, see
77
85
  # {ContextDev::Models::WebWebCrawlMdParams} for more details.
78
86
  #
@@ -90,6 +98,8 @@ module ContextDev
90
98
  #
91
99
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
92
100
  #
101
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
102
+ #
93
103
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
94
104
  #
95
105
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -33,6 +33,12 @@ module ContextDev
33
33
  # @return [Integer]
34
34
  required :num_failed, Integer, api_name: :numFailed
35
35
 
36
+ # @!attribute num_skipped
37
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
38
+ #
39
+ # @return [Integer]
40
+ required :num_skipped, Integer, api_name: :numSkipped
41
+
36
42
  # @!attribute num_succeeded
37
43
  # Number of pages successfully crawled
38
44
  #
@@ -45,11 +51,16 @@ module ContextDev
45
51
  # @return [Integer]
46
52
  required :num_urls, Integer, api_name: :numUrls
47
53
 
48
- # @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
54
+ # @!method initialize(max_crawl_depth:, num_failed:, num_skipped:, num_succeeded:, num_urls:)
55
+ # Some parameter documentations has been truncated, see
56
+ # {ContextDev::Models::WebWebCrawlMdResponse::Metadata} for more details.
57
+ #
49
58
  # @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
50
59
  #
51
60
  # @param num_failed [Integer] Number of pages that failed to crawl
52
61
  #
62
+ # @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
63
+ #
53
64
  # @param num_succeeded [Integer] Number of pages successfully crawled
54
65
  #
55
66
  # @param num_urls [Integer] Total number of URLs crawled
@@ -21,7 +21,15 @@ module ContextDev
21
21
  # @return [Integer, nil]
22
22
  optional :max_age_ms, Integer
23
23
 
24
- # @!method initialize(url:, max_age_ms: nil, request_options: {})
24
+ # @!attribute parse_pdf
25
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
26
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
27
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
28
+ #
29
+ # @return [Boolean, nil]
30
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
31
+
32
+ # @!method initialize(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
25
33
  # Some parameter documentations has been truncated, see
26
34
  # {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
27
35
  #
@@ -29,6 +37,8 @@ module ContextDev
29
37
  #
30
38
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
31
39
  #
40
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
41
+ #
32
42
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
33
43
  end
34
44
  end
@@ -34,6 +34,14 @@ module ContextDev
34
34
  # @return [Integer, nil]
35
35
  optional :max_age_ms, Integer
36
36
 
37
+ # @!attribute parse_pdf
38
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
39
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
40
+ # WEBSITE_ACCESS_ERROR is returned.
41
+ #
42
+ # @return [Boolean, nil]
43
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
44
+
37
45
  # @!attribute shorten_base64_images
38
46
  # Shorten base64-encoded image data in the Markdown output
39
47
  #
@@ -47,7 +55,7 @@ module ContextDev
47
55
  # @return [Boolean, nil]
48
56
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean
49
57
 
50
- # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
58
+ # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
51
59
  # Some parameter documentations has been truncated, see
52
60
  # {ContextDev::Models::WebWebScrapeMdParams} for more details.
53
61
  #
@@ -59,6 +67,8 @@ module ContextDev
59
67
  #
60
68
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
61
69
  #
70
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
71
+ #
62
72
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
63
73
  #
64
74
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -105,7 +105,7 @@ module ContextDev
105
105
  # Performs a crawl starting from a given URL, extracts page content as Markdown,
106
106
  # and returns results for all crawled pages.
107
107
  #
108
- # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
108
+ # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
109
109
  #
110
110
  # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
111
111
  #
@@ -121,6 +121,8 @@ module ContextDev
121
121
  #
122
122
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
123
123
  #
124
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
125
+ #
124
126
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
125
127
  #
126
128
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -148,12 +150,14 @@ module ContextDev
148
150
  #
149
151
  # Scrapes the given URL and returns the raw HTML content of the page.
150
152
  #
151
- # @overload web_scrape_html(url:, max_age_ms: nil, request_options: {})
153
+ # @overload web_scrape_html(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
152
154
  #
153
155
  # @param url [String] Full URL to scrape (must include http:// or https:// protocol)
154
156
  #
155
157
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
156
158
  #
159
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
160
+ #
157
161
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
158
162
  #
159
163
  # @return [ContextDev::Models::WebWebScrapeHTMLResponse]
@@ -165,7 +169,7 @@ module ContextDev
165
169
  @client.request(
166
170
  method: :get,
167
171
  path: "web/scrape/html",
168
- query: query.transform_keys(max_age_ms: "maxAgeMs"),
172
+ query: query.transform_keys(max_age_ms: "maxAgeMs", parse_pdf: "parsePDF"),
169
173
  model: ContextDev::Models::WebWebScrapeHTMLResponse,
170
174
  options: options
171
175
  )
@@ -201,7 +205,7 @@ module ContextDev
201
205
  #
202
206
  # Scrapes the given URL into LLM usable Markdown.
203
207
  #
204
- # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
208
+ # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
205
209
  #
206
210
  # @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
207
211
  #
@@ -211,6 +215,8 @@ module ContextDev
211
215
  #
212
216
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
213
217
  #
218
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
219
+ #
214
220
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
215
221
  #
216
222
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -230,6 +236,7 @@ module ContextDev
230
236
  include_images: "includeImages",
231
237
  include_links: "includeLinks",
232
238
  max_age_ms: "maxAgeMs",
239
+ parse_pdf: "parsePDF",
233
240
  shorten_base64_images: "shortenBase64Images",
234
241
  use_main_content_only: "useMainContentOnly"
235
242
  ),
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ContextDev
4
- VERSION = "1.6.0"
4
+ VERSION = "1.7.0"
5
5
  end
@@ -61,6 +61,15 @@ module ContextDev
61
61
  sig { params(max_pages: Integer).void }
62
62
  attr_writer :max_pages
63
63
 
64
+ # When true (default), PDF pages are fetched and their text layer is extracted and
65
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
66
+ # entirely (not included in results and not counted as failures).
67
+ sig { returns(T.nilable(T::Boolean)) }
68
+ attr_reader :parse_pdf
69
+
70
+ sig { params(parse_pdf: T::Boolean).void }
71
+ attr_writer :parse_pdf
72
+
64
73
  # Truncate base64-encoded image data in the Markdown output
65
74
  sig { returns(T.nilable(T::Boolean)) }
66
75
  attr_reader :shorten_base64_images
@@ -92,6 +101,7 @@ module ContextDev
92
101
  max_age_ms: Integer,
93
102
  max_depth: Integer,
94
103
  max_pages: Integer,
104
+ parse_pdf: T::Boolean,
95
105
  shorten_base64_images: T::Boolean,
96
106
  url_regex: String,
97
107
  use_main_content_only: T::Boolean,
@@ -117,6 +127,10 @@ module ContextDev
117
127
  max_depth: nil,
118
128
  # Maximum number of pages to crawl. Hard cap: 500.
119
129
  max_pages: nil,
130
+ # When true (default), PDF pages are fetched and their text layer is extracted and
131
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
132
+ # entirely (not included in results and not counted as failures).
133
+ parse_pdf: nil,
120
134
  # Truncate base64-encoded image data in the Markdown output
121
135
  shorten_base64_images: nil,
122
136
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -138,6 +152,7 @@ module ContextDev
138
152
  max_age_ms: Integer,
139
153
  max_depth: Integer,
140
154
  max_pages: Integer,
155
+ parse_pdf: T::Boolean,
141
156
  shorten_base64_images: T::Boolean,
142
157
  url_regex: String,
143
158
  use_main_content_only: T::Boolean,
@@ -64,6 +64,10 @@ module ContextDev
64
64
  sig { returns(Integer) }
65
65
  attr_accessor :num_failed
66
66
 
67
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
68
+ sig { returns(Integer) }
69
+ attr_accessor :num_skipped
70
+
67
71
  # Number of pages successfully crawled
68
72
  sig { returns(Integer) }
69
73
  attr_accessor :num_succeeded
@@ -76,6 +80,7 @@ module ContextDev
76
80
  params(
77
81
  max_crawl_depth: Integer,
78
82
  num_failed: Integer,
83
+ num_skipped: Integer,
79
84
  num_succeeded: Integer,
80
85
  num_urls: Integer
81
86
  ).returns(T.attached_class)
@@ -85,6 +90,8 @@ module ContextDev
85
90
  max_crawl_depth:,
86
91
  # Number of pages that failed to crawl
87
92
  num_failed:,
93
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
94
+ num_skipped:,
88
95
  # Number of pages successfully crawled
89
96
  num_succeeded:,
90
97
  # Total number of URLs crawled
@@ -97,6 +104,7 @@ module ContextDev
97
104
  {
98
105
  max_crawl_depth: Integer,
99
106
  num_failed: Integer,
107
+ num_skipped: Integer,
100
108
  num_succeeded: Integer,
101
109
  num_urls: Integer
102
110
  }
@@ -27,10 +27,20 @@ module ContextDev
27
27
  sig { params(max_age_ms: Integer).void }
28
28
  attr_writer :max_age_ms
29
29
 
30
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
31
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
32
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
33
+ sig { returns(T.nilable(T::Boolean)) }
34
+ attr_reader :parse_pdf
35
+
36
+ sig { params(parse_pdf: T::Boolean).void }
37
+ attr_writer :parse_pdf
38
+
30
39
  sig do
31
40
  params(
32
41
  url: String,
33
42
  max_age_ms: Integer,
43
+ parse_pdf: T::Boolean,
34
44
  request_options: ContextDev::RequestOptions::OrHash
35
45
  ).returns(T.attached_class)
36
46
  end
@@ -41,6 +51,10 @@ module ContextDev
41
51
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
42
52
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
43
53
  max_age_ms: nil,
54
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
55
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
56
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
57
+ parse_pdf: nil,
44
58
  request_options: {}
45
59
  )
46
60
  end
@@ -50,6 +64,7 @@ module ContextDev
50
64
  {
51
65
  url: String,
52
66
  max_age_ms: Integer,
67
+ parse_pdf: T::Boolean,
53
68
  request_options: ContextDev::RequestOptions
54
69
  }
55
70
  )
@@ -39,6 +39,15 @@ module ContextDev
39
39
  sig { params(max_age_ms: Integer).void }
40
40
  attr_writer :max_age_ms
41
41
 
42
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
43
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
44
+ # WEBSITE_ACCESS_ERROR is returned.
45
+ sig { returns(T.nilable(T::Boolean)) }
46
+ attr_reader :parse_pdf
47
+
48
+ sig { params(parse_pdf: T::Boolean).void }
49
+ attr_writer :parse_pdf
50
+
42
51
  # Shorten base64-encoded image data in the Markdown output
43
52
  sig { returns(T.nilable(T::Boolean)) }
44
53
  attr_reader :shorten_base64_images
@@ -60,6 +69,7 @@ module ContextDev
60
69
  include_images: T::Boolean,
61
70
  include_links: T::Boolean,
62
71
  max_age_ms: Integer,
72
+ parse_pdf: T::Boolean,
63
73
  shorten_base64_images: T::Boolean,
64
74
  use_main_content_only: T::Boolean,
65
75
  request_options: ContextDev::RequestOptions::OrHash
@@ -77,6 +87,10 @@ module ContextDev
77
87
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
78
88
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
79
89
  max_age_ms: nil,
90
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
91
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
92
+ # WEBSITE_ACCESS_ERROR is returned.
93
+ parse_pdf: nil,
80
94
  # Shorten base64-encoded image data in the Markdown output
81
95
  shorten_base64_images: nil,
82
96
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -93,6 +107,7 @@ module ContextDev
93
107
  include_images: T::Boolean,
94
108
  include_links: T::Boolean,
95
109
  max_age_ms: Integer,
110
+ parse_pdf: T::Boolean,
96
111
  shorten_base64_images: T::Boolean,
97
112
  use_main_content_only: T::Boolean,
98
113
  request_options: ContextDev::RequestOptions
@@ -108,6 +108,7 @@ module ContextDev
108
108
  max_age_ms: Integer,
109
109
  max_depth: Integer,
110
110
  max_pages: Integer,
111
+ parse_pdf: T::Boolean,
111
112
  shorten_base64_images: T::Boolean,
112
113
  url_regex: String,
113
114
  use_main_content_only: T::Boolean,
@@ -133,6 +134,10 @@ module ContextDev
133
134
  max_depth: nil,
134
135
  # Maximum number of pages to crawl. Hard cap: 500.
135
136
  max_pages: nil,
137
+ # When true (default), PDF pages are fetched and their text layer is extracted and
138
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
139
+ # entirely (not included in results and not counted as failures).
140
+ parse_pdf: nil,
136
141
  # Truncate base64-encoded image data in the Markdown output
137
142
  shorten_base64_images: nil,
138
143
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -149,6 +154,7 @@ module ContextDev
149
154
  params(
150
155
  url: String,
151
156
  max_age_ms: Integer,
157
+ parse_pdf: T::Boolean,
152
158
  request_options: ContextDev::RequestOptions::OrHash
153
159
  ).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
154
160
  end
@@ -159,6 +165,10 @@ module ContextDev
159
165
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
160
166
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
161
167
  max_age_ms: nil,
168
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
169
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
170
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
171
+ parse_pdf: nil,
162
172
  request_options: {}
163
173
  )
164
174
  end
@@ -186,6 +196,7 @@ module ContextDev
186
196
  include_images: T::Boolean,
187
197
  include_links: T::Boolean,
188
198
  max_age_ms: Integer,
199
+ parse_pdf: T::Boolean,
189
200
  shorten_base64_images: T::Boolean,
190
201
  use_main_content_only: T::Boolean,
191
202
  request_options: ContextDev::RequestOptions::OrHash
@@ -203,6 +214,10 @@ module ContextDev
203
214
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
204
215
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
205
216
  max_age_ms: nil,
217
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
218
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
219
+ # WEBSITE_ACCESS_ERROR is returned.
220
+ parse_pdf: nil,
206
221
  # Shorten base64-encoded image data in the Markdown output
207
222
  shorten_base64_images: nil,
208
223
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -9,6 +9,7 @@ module ContextDev
9
9
  max_age_ms: Integer,
10
10
  max_depth: Integer,
11
11
  max_pages: Integer,
12
+ parse_pdf: bool,
12
13
  :shorten_base64_images => bool,
13
14
  url_regex: String,
14
15
  use_main_content_only: bool
@@ -45,6 +46,10 @@ module ContextDev
45
46
 
46
47
  def max_pages=: (Integer) -> Integer
47
48
 
49
+ attr_reader parse_pdf: bool?
50
+
51
+ def parse_pdf=: (bool) -> bool
52
+
48
53
  attr_reader shorten_base64_images: bool?
49
54
 
50
55
  def shorten_base64_images=: (bool) -> bool
@@ -65,6 +70,7 @@ module ContextDev
65
70
  ?max_age_ms: Integer,
66
71
  ?max_depth: Integer,
67
72
  ?max_pages: Integer,
73
+ ?parse_pdf: bool,
68
74
  ?shorten_base64_images: bool,
69
75
  ?url_regex: String,
70
76
  ?use_main_content_only: bool,
@@ -79,6 +85,7 @@ module ContextDev
79
85
  max_age_ms: Integer,
80
86
  max_depth: Integer,
81
87
  max_pages: Integer,
88
+ parse_pdf: bool,
82
89
  :shorten_base64_images => bool,
83
90
  url_regex: String,
84
91
  use_main_content_only: bool,
@@ -25,6 +25,7 @@ module ContextDev
25
25
  {
26
26
  max_crawl_depth: Integer,
27
27
  num_failed: Integer,
28
+ num_skipped: Integer,
28
29
  num_succeeded: Integer,
29
30
  num_urls: Integer
30
31
  }
@@ -34,6 +35,8 @@ module ContextDev
34
35
 
35
36
  attr_accessor num_failed: Integer
36
37
 
38
+ attr_accessor num_skipped: Integer
39
+
37
40
  attr_accessor num_succeeded: Integer
38
41
 
39
42
  attr_accessor num_urls: Integer
@@ -41,6 +44,7 @@ module ContextDev
41
44
  def initialize: (
42
45
  max_crawl_depth: Integer,
43
46
  num_failed: Integer,
47
+ num_skipped: Integer,
44
48
  num_succeeded: Integer,
45
49
  num_urls: Integer
46
50
  ) -> void
@@ -48,6 +52,7 @@ module ContextDev
48
52
  def to_hash: -> {
49
53
  max_crawl_depth: Integer,
50
54
  num_failed: Integer,
55
+ num_skipped: Integer,
51
56
  num_succeeded: Integer,
52
57
  num_urls: Integer
53
58
  }
@@ -1,7 +1,7 @@
1
1
  module ContextDev
2
2
  module Models
3
3
  type web_web_scrape_html_params =
4
- { url: String, max_age_ms: Integer }
4
+ { url: String, max_age_ms: Integer, parse_pdf: bool }
5
5
  & ContextDev::Internal::Type::request_parameters
6
6
 
7
7
  class WebWebScrapeHTMLParams < ContextDev::Internal::Type::BaseModel
@@ -14,15 +14,21 @@ module ContextDev
14
14
 
15
15
  def max_age_ms=: (Integer) -> Integer
16
16
 
17
+ attr_reader parse_pdf: bool?
18
+
19
+ def parse_pdf=: (bool) -> bool
20
+
17
21
  def initialize: (
18
22
  url: String,
19
23
  ?max_age_ms: Integer,
24
+ ?parse_pdf: bool,
20
25
  ?request_options: ContextDev::request_opts
21
26
  ) -> void
22
27
 
23
28
  def to_hash: -> {
24
29
  url: String,
25
30
  max_age_ms: Integer,
31
+ parse_pdf: bool,
26
32
  request_options: ContextDev::RequestOptions
27
33
  }
28
34
  end
@@ -6,6 +6,7 @@ module ContextDev
6
6
  include_images: bool,
7
7
  include_links: bool,
8
8
  max_age_ms: Integer,
9
+ parse_pdf: bool,
9
10
  :shorten_base64_images => bool,
10
11
  use_main_content_only: bool
11
12
  }
@@ -29,6 +30,10 @@ module ContextDev
29
30
 
30
31
  def max_age_ms=: (Integer) -> Integer
31
32
 
33
+ attr_reader parse_pdf: bool?
34
+
35
+ def parse_pdf=: (bool) -> bool
36
+
32
37
  attr_reader shorten_base64_images: bool?
33
38
 
34
39
  def shorten_base64_images=: (bool) -> bool
@@ -42,6 +47,7 @@ module ContextDev
42
47
  ?include_images: bool,
43
48
  ?include_links: bool,
44
49
  ?max_age_ms: Integer,
50
+ ?parse_pdf: bool,
45
51
  ?shorten_base64_images: bool,
46
52
  ?use_main_content_only: bool,
47
53
  ?request_options: ContextDev::request_opts
@@ -52,6 +58,7 @@ module ContextDev
52
58
  include_images: bool,
53
59
  include_links: bool,
54
60
  max_age_ms: Integer,
61
+ parse_pdf: bool,
55
62
  :shorten_base64_images => bool,
56
63
  use_main_content_only: bool,
57
64
  request_options: ContextDev::RequestOptions
@@ -32,6 +32,7 @@ module ContextDev
32
32
  ?max_age_ms: Integer,
33
33
  ?max_depth: Integer,
34
34
  ?max_pages: Integer,
35
+ ?parse_pdf: bool,
35
36
  ?shorten_base64_images: bool,
36
37
  ?url_regex: String,
37
38
  ?use_main_content_only: bool,
@@ -41,6 +42,7 @@ module ContextDev
41
42
  def web_scrape_html: (
42
43
  url: String,
43
44
  ?max_age_ms: Integer,
45
+ ?parse_pdf: bool,
44
46
  ?request_options: ContextDev::request_opts
45
47
  ) -> ContextDev::Models::WebWebScrapeHTMLResponse
46
48
 
@@ -54,6 +56,7 @@ module ContextDev
54
56
  ?include_images: bool,
55
57
  ?include_links: bool,
56
58
  ?max_age_ms: Integer,
59
+ ?parse_pdf: bool,
57
60
  ?shorten_base64_images: bool,
58
61
  ?use_main_content_only: bool,
59
62
  ?request_options: ContextDev::request_opts
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: context.dev
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Context Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-23 00:00:00.000000000 Z
11
+ date: 2026-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cgi