context.dev 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e47becb29d02dd213aee4b6b49429685c9d74c35f37d49c026b8bbabc87be191
4
- data.tar.gz: 2e76f0c796bf46d9ec0a1f764de47841eabf2cb290c71b279a9740b574393d98
3
+ metadata.gz: 5a6ec86b2fdf43b0c3f9b800d8b93558a0c93fdf23fd72fad27a46ba43f2dccc
4
+ data.tar.gz: 8d5533fd69c08516b77b15b9bdcdfafea6cd18f4be1a77d17daa20bad2a6f9b1
5
5
  SHA512:
6
- metadata.gz: 859d0da8ad2c2e9fd7d0bdb4b783b9e8c75f4323d3a599b42d28ad495342ff1187f6a502449b1a325090f3ccab3f76c1d855e25c5aaaee1e7e98d690e9ac44e4
7
- data.tar.gz: 5b30272a7da7dbfeb75475e067f86bb404313ee842aa86b973817ccf359dc07c53fe06890b2e2dc34b689da07ec3b12a2b77a1f871eeaa0c549196f1afbc873d
6
+ metadata.gz: e98fe1516e060eb780f470e8c5ddc29ba4fdea44c4b11ca40d79d2dfacc7009eb11c95360c19dc0f2864857cc2560ce91569d3587cdaacc8009f757b76a293a8
7
+ data.tar.gz: 515ce97f5f873d21cef0665843408f507ad5ee2221ae9308d336617e57b2980d0c8add55c46193f2e10c580819cf09654213d32d6f15558f17c6adc87114ef32
data/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.18.0 (2026-05-10)
4
+
5
+ Full Changelog: [v1.17.0...v1.18.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.17.0...v1.18.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** api update ([b582c05](https://github.com/context-dot-dev/context-ruby-sdk/commit/b582c05376102bb0cb6f8d4d8c9a2cefdef8c1ec))
10
+ * **api:** api update ([4a4e4bb](https://github.com/context-dot-dev/context-ruby-sdk/commit/4a4e4bbc547662de263a307b213dd7eecd03a61d))
11
+ * **api:** manual updates ([ec963bb](https://github.com/context-dot-dev/context-ruby-sdk/commit/ec963bb99ac36d162552c76fb067e87144f21089))
12
+
3
13
  ## 1.17.0 (2026-05-09)
4
14
 
5
15
  Full Changelog: [v1.16.0...v1.17.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.16.0...v1.17.0)
data/README.md CHANGED
@@ -8,8 +8,8 @@ It is generated with [Stainless](https://www.stainless.com/).
8
8
 
9
9
  Use the Context Dev MCP Server to enable AI assistants to interact with this API, allowing them to explore endpoints, make test requests, and use documentation to help integrate this SDK into your application.
10
10
 
11
- [![Add to Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=context.dev-mcp&config=eyJuYW1lIjoiY29udGV4dC5kZXYtbWNwIiwidHJhbnNwb3J0IjoiaHR0cCIsInVybCI6Imh0dHBzOi8vY29udGV4dC1kZXYuc3RsbWNwLmNvbSIsImhlYWRlcnMiOnsieC1jb250ZXh0LWRldi1hcGkta2V5IjoiTXkgQVBJIEtleSJ9fQ)
12
- [![Install in VS Code](https://img.shields.io/badge/_-Add_to_VS_Code-blue?style=for-the-badge&logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGZpbGw9Im5vbmUiIHZpZXdCb3g9IjAgMCA0MCA0MCI+PHBhdGggZmlsbD0iI0VFRSIgZmlsbC1ydWxlPSJldmVub2RkIiBkPSJNMzAuMjM1IDM5Ljg4NGEyLjQ5MSAyLjQ5MSAwIDAgMS0xLjc4MS0uNzNMMTIuNyAyNC43OGwtMy40NiAyLjYyNC0zLjQwNiAyLjU4MmExLjY2NSAxLjY2NSAwIDAgMS0xLjA4Mi4zMzggMS42NjQgMS42NjQgMCAwIDEtMS4wNDYtLjQzMWwtMi4yLTJhMS42NjYgMS42NjYgMCAwIDEgMC0yLjQ2M0w3LjQ1OCAyMCA0LjY3IDE3LjQ1MyAxLjUwNyAxNC41N2ExLjY2NSAxLjY2NSAwIDAgMSAwLTIuNDYzbDIuMi0yYTEuNjY1IDEuNjY1IDAgMCAxIDIuMTMtLjA5N2w2Ljg2MyA1LjIwOUwyOC40NTIuODQ0YTIuNDg4IDIuNDg4IDAgMCAxIDEuODQxLS43MjljLjM1MS4wMDkuNjk5LjA5MSAxLjAxOS4yNDVsOC4yMzYgMy45NjFhMi41IDIuNSAwIDAgMSAxLjQxNSAyLjI1M3YuMDk5LS4wNDVWMzMuMzd2LS4wNDUuMDk1YTIuNTAxIDIuNTAxIDAgMCAxLTEuNDE2IDIuMjU3bC04LjIzNSAzLjk2MWEyLjQ5MiAyLjQ5MiAwIDAgMS0xLjA3Ny4yNDZabS43MTYtMjguOTQ3LTExLjk0OCA5LjA2MiAxMS45NTIgOS4wNjUtLjAwNC0xOC4xMjdaIi8+PC9zdmc+)](https://vscode.stainless.com/mcp/%7B%22name%22%3A%22context.dev-mcp%22%2C%22type%22%3A%22http%22%2C%22url%22%3A%22https%3A%2F%2Fcontext-dev.stlmcp.com%22%2C%22headers%22%3A%7B%22x-context-dev-api-key%22%3A%22My%20API%20Key%22%7D%7D)
11
+ [![Add to Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=context-dev-mcp&config=eyJuYW1lIjoiY29udGV4dC1kZXYtbWNwIiwidHJhbnNwb3J0IjoiaHR0cCIsInVybCI6Imh0dHBzOi8vY29udGV4dC1kZXYuc3RsbWNwLmNvbSIsImhlYWRlcnMiOnsieC1jb250ZXh0LWRldi1hcGkta2V5IjoiTXkgQVBJIEtleSJ9fQ)
12
+ [![Install in VS Code](https://img.shields.io/badge/_-Add_to_VS_Code-blue?style=for-the-badge&logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGZpbGw9Im5vbmUiIHZpZXdCb3g9IjAgMCA0MCA0MCI+PHBhdGggZmlsbD0iI0VFRSIgZmlsbC1ydWxlPSJldmVub2RkIiBkPSJNMzAuMjM1IDM5Ljg4NGEyLjQ5MSAyLjQ5MSAwIDAgMS0xLjc4MS0uNzNMMTIuNyAyNC43OGwtMy40NiAyLjYyNC0zLjQwNiAyLjU4MmExLjY2NSAxLjY2NSAwIDAgMS0xLjA4Mi4zMzggMS42NjQgMS42NjQgMCAwIDEtMS4wNDYtLjQzMWwtMi4yLTJhMS42NjYgMS42NjYgMCAwIDEgMC0yLjQ2M0w3LjQ1OCAyMCA0LjY3IDE3LjQ1MyAxLjUwNyAxNC41N2ExLjY2NSAxLjY2NSAwIDAgMSAwLTIuNDYzbDIuMi0yYTEuNjY1IDEuNjY1IDAgMCAxIDIuMTMtLjA5N2w2Ljg2MyA1LjIwOUwyOC40NTIuODQ0YTIuNDg4IDIuNDg4IDAgMCAxIDEuODQxLS43MjljLjM1MS4wMDkuNjk5LjA5MSAxLjAxOS4yNDVsOC4yMzYgMy45NjFhMi41IDIuNSAwIDAgMSAxLjQxNSAyLjI1M3YuMDk5LS4wNDVWMzMuMzd2LS4wNDUuMDk1YTIuNTAxIDIuNTAxIDAgMCAxLTEuNDE2IDIuMjU3bC04LjIzNSAzLjk2MWEyLjQ5MiAyLjQ5MiAwIDAgMS0xLjA3Ny4yNDZabS43MTYtMjguOTQ3LTExLjk0OCA5LjA2MiAxMS45NTIgOS4wNjUtLjAwNC0xOC4xMjdaIi8+PC9zdmc+)](https://vscode.stainless.com/mcp/%7B%22name%22%3A%22context-dev-mcp%22%2C%22type%22%3A%22http%22%2C%22url%22%3A%22https%3A%2F%2Fcontext-dev.stlmcp.com%22%2C%22headers%22%3A%7B%22x-context-dev-api-key%22%3A%22My%20API%20Key%22%7D%7D)
13
13
 
14
14
  > Note: You may need to set environment variables in your MCP client.
15
15
 
@@ -26,7 +26,7 @@ To use this gem, install via Bundler by adding the following to your application
26
26
  <!-- x-release-please-start-version -->
27
27
 
28
28
  ```ruby
29
- gem "context.dev", "~> 1.17.0"
29
+ gem "context.dev", "~> 1.18.0"
30
30
  ```
31
31
 
32
32
  <!-- x-release-please-end -->
@@ -60,13 +60,12 @@ module ContextDev
60
60
  # @return [Integer, nil]
61
61
  optional :max_pages, Integer, api_name: :maxPages
62
62
 
63
- # @!attribute parse_pdf
64
- # When true (default), PDF pages are fetched and their text layer is extracted and
65
- # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
66
- # entirely (not included in results and not counted as failures).
63
+ # @!attribute pdf
64
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
65
+ # inclusive 1-based page range.
67
66
  #
68
- # @return [Boolean, nil]
69
- optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
67
+ # @return [ContextDev::Models::WebWebCrawlMdParams::Pdf, nil]
68
+ optional :pdf, -> { ContextDev::WebWebCrawlMdParams::Pdf }
70
69
 
71
70
  # @!attribute shorten_base64_images
72
71
  # Truncate base64-encoded image data in the Markdown output
@@ -74,6 +73,15 @@ module ContextDev
74
73
  # @return [Boolean, nil]
75
74
  optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images
76
75
 
76
+ # @!attribute stop_after_ms
77
+ # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
78
+ # checks the elapsed time and, if exceeded, returns the pages collected so far
79
+ # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
80
+ # min).
81
+ #
82
+ # @return [Integer, nil]
83
+ optional :stop_after_ms, Integer, api_name: :stopAfterMs
84
+
77
85
  # @!attribute timeout_ms
78
86
  # Optional timeout in milliseconds for the request. If the request takes longer
79
87
  # than this value, it will be aborted with a 408 status code. Maximum allowed
@@ -102,7 +110,7 @@ module ContextDev
102
110
  # @return [Integer, nil]
103
111
  optional :wait_for_ms, Integer, api_name: :waitForMs
104
112
 
105
- # @!method initialize(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
113
+ # @!method initialize(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, pdf: nil, shorten_base64_images: nil, stop_after_ms: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
106
114
  # Some parameter documentations has been truncated, see
107
115
  # {ContextDev::Models::WebWebCrawlMdParams} for more details.
108
116
  #
@@ -122,10 +130,12 @@ module ContextDev
122
130
  #
123
131
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
124
132
  #
125
- # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
133
+ # @param pdf [ContextDev::Models::WebWebCrawlMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
126
134
  #
127
135
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
128
136
  #
137
+ # @param stop_after_ms [Integer] Soft time budget for the crawl in milliseconds. After each scrape, the crawler c
138
+ #
129
139
  # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
130
140
  #
131
141
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -135,6 +145,41 @@ module ContextDev
135
145
  # @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load for each craw
136
146
  #
137
147
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
148
+
149
+ class Pdf < ContextDev::Internal::Type::BaseModel
150
+ # @!attribute end_
151
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
152
+ # Must be greater than or equal to start when both are provided.
153
+ #
154
+ # @return [Integer, nil]
155
+ optional :end_, Integer, api_name: :end
156
+
157
+ # @!attribute should_parse
158
+ # When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
159
+ # entirely (not included in results and not counted as failures).
160
+ #
161
+ # @return [Boolean, nil]
162
+ optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
163
+
164
+ # @!attribute start
165
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
166
+ #
167
+ # @return [Integer, nil]
168
+ optional :start, Integer
169
+
170
+ # @!method initialize(end_: nil, should_parse: nil, start: nil)
171
+ # Some parameter documentations has been truncated, see
172
+ # {ContextDev::Models::WebWebCrawlMdParams::Pdf} for more details.
173
+ #
174
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
175
+ # inclusive 1-based page range.
176
+ #
177
+ # @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
178
+ #
179
+ # @param should_parse [Boolean] When true, PDF pages are fetched and parsed. When false, PDF pages are skipped e
180
+ #
181
+ # @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
182
+ end
138
183
  end
139
184
  end
140
185
  end
@@ -34,7 +34,8 @@ module ContextDev
34
34
  required :num_failed, Integer, api_name: :numFailed
35
35
 
36
36
  # @!attribute num_skipped
37
- # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
37
+ # Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
38
+ # urlRegex)
38
39
  #
39
40
  # @return [Integer]
40
41
  required :num_skipped, Integer, api_name: :numSkipped
@@ -59,7 +60,7 @@ module ContextDev
59
60
  #
60
61
  # @param num_failed [Integer] Number of pages that failed to crawl
61
62
  #
62
- # @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
63
+ # @param num_skipped [Integer] Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching ur
63
64
  #
64
65
  # @param num_succeeded [Integer] Number of pages successfully crawled
65
66
  #
@@ -27,13 +27,12 @@ module ContextDev
27
27
  # @return [Integer, nil]
28
28
  optional :max_age_ms, Integer
29
29
 
30
- # @!attribute parse_pdf
31
- # When true (default), PDF URLs are fetched and their text layer is extracted and
32
- # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
33
- # and a 400 WEBSITE_ACCESS_ERROR is returned.
30
+ # @!attribute pdf
31
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
32
+ # inclusive 1-based page range.
34
33
  #
35
- # @return [Boolean, nil]
36
- optional :parse_pdf, ContextDev::Internal::Type::Boolean
34
+ # @return [ContextDev::Models::WebWebScrapeHTMLParams::Pdf, nil]
35
+ optional :pdf, -> { ContextDev::WebWebScrapeHTMLParams::Pdf }
37
36
 
38
37
  # @!attribute timeout_ms
39
38
  # Optional timeout in milliseconds for the request. If the request takes longer
@@ -50,7 +49,7 @@ module ContextDev
50
49
  # @return [Integer, nil]
51
50
  optional :wait_for_ms, Integer
52
51
 
53
- # @!method initialize(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
52
+ # @!method initialize(url:, include_frames: nil, max_age_ms: nil, pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
54
53
  # Some parameter documentations has been truncated, see
55
54
  # {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
56
55
  #
@@ -60,13 +59,48 @@ module ContextDev
60
59
  #
61
60
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
62
61
  #
63
- # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
62
+ # @param pdf [ContextDev::Models::WebWebScrapeHTMLParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
64
63
  #
65
64
  # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
66
65
  #
67
66
  # @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
68
67
  #
69
68
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
69
+
70
+ class Pdf < ContextDev::Internal::Type::BaseModel
71
+ # @!attribute end_
72
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
73
+ # Must be greater than or equal to start when both are provided.
74
+ #
75
+ # @return [Integer, nil]
76
+ optional :end_, Integer, api_name: :end
77
+
78
+ # @!attribute should_parse
79
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
80
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
81
+ #
82
+ # @return [Boolean, nil]
83
+ optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
84
+
85
+ # @!attribute start
86
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
87
+ #
88
+ # @return [Integer, nil]
89
+ optional :start, Integer
90
+
91
+ # @!method initialize(end_: nil, should_parse: nil, start: nil)
92
+ # Some parameter documentations has been truncated, see
93
+ # {ContextDev::Models::WebWebScrapeHTMLParams::Pdf} for more details.
94
+ #
95
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
96
+ # inclusive 1-based page range.
97
+ #
98
+ # @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
99
+ #
100
+ # @param should_parse [Boolean] When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
101
+ #
102
+ # @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
103
+ end
70
104
  end
71
105
  end
72
106
  end
@@ -40,13 +40,12 @@ module ContextDev
40
40
  # @return [Integer, nil]
41
41
  optional :max_age_ms, Integer
42
42
 
43
- # @!attribute parse_pdf
44
- # When true (default), PDF URLs are fetched and their text layer is extracted and
45
- # converted to Markdown. When false, PDF URLs are skipped and a 400
46
- # WEBSITE_ACCESS_ERROR is returned.
43
+ # @!attribute pdf
44
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
45
+ # inclusive 1-based page range.
47
46
  #
48
- # @return [Boolean, nil]
49
- optional :parse_pdf, ContextDev::Internal::Type::Boolean
47
+ # @return [ContextDev::Models::WebWebScrapeMdParams::Pdf, nil]
48
+ optional :pdf, -> { ContextDev::WebWebScrapeMdParams::Pdf }
50
49
 
51
50
  # @!attribute shorten_base64_images
52
51
  # Shorten base64-encoded image data in the Markdown output
@@ -76,7 +75,7 @@ module ContextDev
76
75
  # @return [Integer, nil]
77
76
  optional :wait_for_ms, Integer
78
77
 
79
- # @!method initialize(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
78
+ # @!method initialize(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
80
79
  # Some parameter documentations has been truncated, see
81
80
  # {ContextDev::Models::WebWebScrapeMdParams} for more details.
82
81
  #
@@ -90,7 +89,7 @@ module ContextDev
90
89
  #
91
90
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
92
91
  #
93
- # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
92
+ # @param pdf [ContextDev::Models::WebWebScrapeMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
94
93
  #
95
94
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
96
95
  #
@@ -101,6 +100,41 @@ module ContextDev
101
100
  # @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before conver
102
101
  #
103
102
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
103
+
104
+ class Pdf < ContextDev::Internal::Type::BaseModel
105
+ # @!attribute end_
106
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
107
+ # Must be greater than or equal to start when both are provided.
108
+ #
109
+ # @return [Integer, nil]
110
+ optional :end_, Integer, api_name: :end
111
+
112
+ # @!attribute should_parse
113
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
114
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
115
+ #
116
+ # @return [Boolean, nil]
117
+ optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
118
+
119
+ # @!attribute start
120
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
121
+ #
122
+ # @return [Integer, nil]
123
+ optional :start, Integer
124
+
125
+ # @!method initialize(end_: nil, should_parse: nil, start: nil)
126
+ # Some parameter documentations has been truncated, see
127
+ # {ContextDev::Models::WebWebScrapeMdParams::Pdf} for more details.
128
+ #
129
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
130
+ # inclusive 1-based page range.
131
+ #
132
+ # @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
133
+ #
134
+ # @param should_parse [Boolean] When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
135
+ #
136
+ # @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
137
+ end
104
138
  end
105
139
  end
106
140
  end
@@ -117,7 +117,7 @@ module ContextDev
117
117
  # Performs a crawl starting from a given URL, extracts page content as Markdown,
118
118
  # and returns results for all crawled pages.
119
119
  #
120
- # @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
120
+ # @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, pdf: nil, shorten_base64_images: nil, stop_after_ms: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
121
121
  #
122
122
  # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
123
123
  #
@@ -135,10 +135,12 @@ module ContextDev
135
135
  #
136
136
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
137
137
  #
138
- # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
138
+ # @param pdf [ContextDev::Models::WebWebCrawlMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
139
139
  #
140
140
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
141
141
  #
142
+ # @param stop_after_ms [Integer] Soft time budget for the crawl in milliseconds. After each scrape, the crawler c
143
+ #
142
144
  # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
143
145
  #
144
146
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -168,7 +170,7 @@ module ContextDev
168
170
  #
169
171
  # Scrapes the given URL and returns the raw HTML content of the page.
170
172
  #
171
- # @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
173
+ # @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
172
174
  #
173
175
  # @param url [String] Full URL to scrape (must include http:// or https:// protocol)
174
176
  #
@@ -176,7 +178,7 @@ module ContextDev
176
178
  #
177
179
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
178
180
  #
179
- # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
181
+ # @param pdf [ContextDev::Models::WebWebScrapeHTMLParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
180
182
  #
181
183
  # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
182
184
  #
@@ -196,7 +198,6 @@ module ContextDev
196
198
  query: query.transform_keys(
197
199
  include_frames: "includeFrames",
198
200
  max_age_ms: "maxAgeMs",
199
- parse_pdf: "parsePDF",
200
201
  timeout_ms: "timeoutMS",
201
202
  wait_for_ms: "waitForMs"
202
203
  ),
@@ -251,7 +252,7 @@ module ContextDev
251
252
  #
252
253
  # Scrapes the given URL into LLM usable Markdown.
253
254
  #
254
- # @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
255
+ # @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
255
256
  #
256
257
  # @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
257
258
  #
@@ -263,7 +264,7 @@ module ContextDev
263
264
  #
264
265
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
265
266
  #
266
- # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
267
+ # @param pdf [ContextDev::Models::WebWebScrapeMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
267
268
  #
268
269
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
269
270
  #
@@ -289,7 +290,6 @@ module ContextDev
289
290
  include_images: "includeImages",
290
291
  include_links: "includeLinks",
291
292
  max_age_ms: "maxAgeMs",
292
- parse_pdf: "parsePDF",
293
293
  shorten_base64_images: "shortenBase64Images",
294
294
  timeout_ms: "timeoutMS",
295
295
  use_main_content_only: "useMainContentOnly",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ContextDev
4
- VERSION = "1.17.0"
4
+ VERSION = "1.18.0"
5
5
  end
@@ -69,14 +69,13 @@ module ContextDev
69
69
  sig { params(max_pages: Integer).void }
70
70
  attr_writer :max_pages
71
71
 
72
- # When true (default), PDF pages are fetched and their text layer is extracted and
73
- # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
74
- # entirely (not included in results and not counted as failures).
75
- sig { returns(T.nilable(T::Boolean)) }
76
- attr_reader :parse_pdf
72
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
73
+ # inclusive 1-based page range.
74
+ sig { returns(T.nilable(ContextDev::WebWebCrawlMdParams::Pdf)) }
75
+ attr_reader :pdf
77
76
 
78
- sig { params(parse_pdf: T::Boolean).void }
79
- attr_writer :parse_pdf
77
+ sig { params(pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash).void }
78
+ attr_writer :pdf
80
79
 
81
80
  # Truncate base64-encoded image data in the Markdown output
82
81
  sig { returns(T.nilable(T::Boolean)) }
@@ -85,6 +84,16 @@ module ContextDev
85
84
  sig { params(shorten_base64_images: T::Boolean).void }
86
85
  attr_writer :shorten_base64_images
87
86
 
87
+ # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
88
+ # checks the elapsed time and, if exceeded, returns the pages collected so far
89
+ # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
90
+ # min).
91
+ sig { returns(T.nilable(Integer)) }
92
+ attr_reader :stop_after_ms
93
+
94
+ sig { params(stop_after_ms: Integer).void }
95
+ attr_writer :stop_after_ms
96
+
88
97
  # Optional timeout in milliseconds for the request. If the request takes longer
89
98
  # than this value, it will be aborted with a 408 status code. Maximum allowed
90
99
  # value is 300000ms (5 minutes).
@@ -127,8 +136,9 @@ module ContextDev
127
136
  max_age_ms: Integer,
128
137
  max_depth: Integer,
129
138
  max_pages: Integer,
130
- parse_pdf: T::Boolean,
139
+ pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash,
131
140
  shorten_base64_images: T::Boolean,
141
+ stop_after_ms: Integer,
132
142
  timeout_ms: Integer,
133
143
  url_regex: String,
134
144
  use_main_content_only: T::Boolean,
@@ -158,12 +168,16 @@ module ContextDev
158
168
  max_depth: nil,
159
169
  # Maximum number of pages to crawl. Hard cap: 500.
160
170
  max_pages: nil,
161
- # When true (default), PDF pages are fetched and their text layer is extracted and
162
- # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
163
- # entirely (not included in results and not counted as failures).
164
- parse_pdf: nil,
171
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
172
+ # inclusive 1-based page range.
173
+ pdf: nil,
165
174
  # Truncate base64-encoded image data in the Markdown output
166
175
  shorten_base64_images: nil,
176
+ # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
177
+ # checks the elapsed time and, if exceeded, returns the pages collected so far
178
+ # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
179
+ # min).
180
+ stop_after_ms: nil,
167
181
  # Optional timeout in milliseconds for the request. If the request takes longer
168
182
  # than this value, it will be aborted with a 408 status code. Maximum allowed
169
183
  # value is 300000ms (5 minutes).
@@ -191,8 +205,9 @@ module ContextDev
191
205
  max_age_ms: Integer,
192
206
  max_depth: Integer,
193
207
  max_pages: Integer,
194
- parse_pdf: T::Boolean,
208
+ pdf: ContextDev::WebWebCrawlMdParams::Pdf,
195
209
  shorten_base64_images: T::Boolean,
210
+ stop_after_ms: Integer,
196
211
  timeout_ms: Integer,
197
212
  url_regex: String,
198
213
  use_main_content_only: T::Boolean,
@@ -203,6 +218,68 @@ module ContextDev
203
218
  end
204
219
  def to_hash
205
220
  end
221
+
222
+ class Pdf < ContextDev::Internal::Type::BaseModel
223
+ OrHash =
224
+ T.type_alias do
225
+ T.any(
226
+ ContextDev::WebWebCrawlMdParams::Pdf,
227
+ ContextDev::Internal::AnyHash
228
+ )
229
+ end
230
+
231
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
232
+ # Must be greater than or equal to start when both are provided.
233
+ sig { returns(T.nilable(Integer)) }
234
+ attr_reader :end_
235
+
236
+ sig { params(end_: Integer).void }
237
+ attr_writer :end_
238
+
239
+ # When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
240
+ # entirely (not included in results and not counted as failures).
241
+ sig { returns(T.nilable(T::Boolean)) }
242
+ attr_reader :should_parse
243
+
244
+ sig { params(should_parse: T::Boolean).void }
245
+ attr_writer :should_parse
246
+
247
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
248
+ sig { returns(T.nilable(Integer)) }
249
+ attr_reader :start
250
+
251
+ sig { params(start: Integer).void }
252
+ attr_writer :start
253
+
254
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
255
+ # inclusive 1-based page range.
256
+ sig do
257
+ params(
258
+ end_: Integer,
259
+ should_parse: T::Boolean,
260
+ start: Integer
261
+ ).returns(T.attached_class)
262
+ end
263
+ def self.new(
264
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
265
+ # Must be greater than or equal to start when both are provided.
266
+ end_: nil,
267
+ # When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
268
+ # entirely (not included in results and not counted as failures).
269
+ should_parse: nil,
270
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
271
+ start: nil
272
+ )
273
+ end
274
+
275
+ sig do
276
+ override.returns(
277
+ { end_: Integer, should_parse: T::Boolean, start: Integer }
278
+ )
279
+ end
280
+ def to_hash
281
+ end
282
+ end
206
283
  end
207
284
  end
208
285
  end
@@ -64,7 +64,8 @@ module ContextDev
64
64
  sig { returns(Integer) }
65
65
  attr_accessor :num_failed
66
66
 
67
- # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
67
+ # Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
68
+ # urlRegex)
68
69
  sig { returns(Integer) }
69
70
  attr_accessor :num_skipped
70
71
 
@@ -90,7 +91,8 @@ module ContextDev
90
91
  max_crawl_depth:,
91
92
  # Number of pages that failed to crawl
92
93
  num_failed:,
93
- # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
94
+ # Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
95
+ # urlRegex)
94
96
  num_skipped:,
95
97
  # Number of pages successfully crawled
96
98
  num_succeeded:,
@@ -34,14 +34,13 @@ module ContextDev
34
34
  sig { params(max_age_ms: Integer).void }
35
35
  attr_writer :max_age_ms
36
36
 
37
- # When true (default), PDF URLs are fetched and their text layer is extracted and
38
- # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
39
- # and a 400 WEBSITE_ACCESS_ERROR is returned.
40
- sig { returns(T.nilable(T::Boolean)) }
41
- attr_reader :parse_pdf
37
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
38
+ # inclusive 1-based page range.
39
+ sig { returns(T.nilable(ContextDev::WebWebScrapeHTMLParams::Pdf)) }
40
+ attr_reader :pdf
42
41
 
43
- sig { params(parse_pdf: T::Boolean).void }
44
- attr_writer :parse_pdf
42
+ sig { params(pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash).void }
43
+ attr_writer :pdf
45
44
 
46
45
  # Optional timeout in milliseconds for the request. If the request takes longer
47
46
  # than this value, it will be aborted with a 408 status code. Maximum allowed
@@ -65,7 +64,7 @@ module ContextDev
65
64
  url: String,
66
65
  include_frames: T::Boolean,
67
66
  max_age_ms: Integer,
68
- parse_pdf: T::Boolean,
67
+ pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash,
69
68
  timeout_ms: Integer,
70
69
  wait_for_ms: Integer,
71
70
  request_options: ContextDev::RequestOptions::OrHash
@@ -80,10 +79,9 @@ module ContextDev
80
79
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
81
80
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
82
81
  max_age_ms: nil,
83
- # When true (default), PDF URLs are fetched and their text layer is extracted and
84
- # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
85
- # and a 400 WEBSITE_ACCESS_ERROR is returned.
86
- parse_pdf: nil,
82
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
83
+ # inclusive 1-based page range.
84
+ pdf: nil,
87
85
  # Optional timeout in milliseconds for the request. If the request takes longer
88
86
  # than this value, it will be aborted with a 408 status code. Maximum allowed
89
87
  # value is 300000ms (5 minutes).
@@ -101,7 +99,7 @@ module ContextDev
101
99
  url: String,
102
100
  include_frames: T::Boolean,
103
101
  max_age_ms: Integer,
104
- parse_pdf: T::Boolean,
102
+ pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
105
103
  timeout_ms: Integer,
106
104
  wait_for_ms: Integer,
107
105
  request_options: ContextDev::RequestOptions
@@ -110,6 +108,68 @@ module ContextDev
110
108
  end
111
109
  def to_hash
112
110
  end
111
+
112
+ class Pdf < ContextDev::Internal::Type::BaseModel
113
+ OrHash =
114
+ T.type_alias do
115
+ T.any(
116
+ ContextDev::WebWebScrapeHTMLParams::Pdf,
117
+ ContextDev::Internal::AnyHash
118
+ )
119
+ end
120
+
121
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
122
+ # Must be greater than or equal to start when both are provided.
123
+ sig { returns(T.nilable(Integer)) }
124
+ attr_reader :end_
125
+
126
+ sig { params(end_: Integer).void }
127
+ attr_writer :end_
128
+
129
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
130
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
131
+ sig { returns(T.nilable(T::Boolean)) }
132
+ attr_reader :should_parse
133
+
134
+ sig { params(should_parse: T::Boolean).void }
135
+ attr_writer :should_parse
136
+
137
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
138
+ sig { returns(T.nilable(Integer)) }
139
+ attr_reader :start
140
+
141
+ sig { params(start: Integer).void }
142
+ attr_writer :start
143
+
144
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
145
+ # inclusive 1-based page range.
146
+ sig do
147
+ params(
148
+ end_: Integer,
149
+ should_parse: T::Boolean,
150
+ start: Integer
151
+ ).returns(T.attached_class)
152
+ end
153
+ def self.new(
154
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
155
+ # Must be greater than or equal to start when both are provided.
156
+ end_: nil,
157
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
158
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
159
+ should_parse: nil,
160
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
161
+ start: nil
162
+ )
163
+ end
164
+
165
+ sig do
166
+ override.returns(
167
+ { end_: Integer, should_parse: T::Boolean, start: Integer }
168
+ )
169
+ end
170
+ def to_hash
171
+ end
172
+ end
113
173
  end
114
174
  end
115
175
  end
@@ -46,14 +46,13 @@ module ContextDev
46
46
  sig { params(max_age_ms: Integer).void }
47
47
  attr_writer :max_age_ms
48
48
 
49
- # When true (default), PDF URLs are fetched and their text layer is extracted and
50
- # converted to Markdown. When false, PDF URLs are skipped and a 400
51
- # WEBSITE_ACCESS_ERROR is returned.
52
- sig { returns(T.nilable(T::Boolean)) }
53
- attr_reader :parse_pdf
49
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
50
+ # inclusive 1-based page range.
51
+ sig { returns(T.nilable(ContextDev::WebWebScrapeMdParams::Pdf)) }
52
+ attr_reader :pdf
54
53
 
55
- sig { params(parse_pdf: T::Boolean).void }
56
- attr_writer :parse_pdf
54
+ sig { params(pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash).void }
55
+ attr_writer :pdf
57
56
 
58
57
  # Shorten base64-encoded image data in the Markdown output
59
58
  sig { returns(T.nilable(T::Boolean)) }
@@ -94,7 +93,7 @@ module ContextDev
94
93
  include_images: T::Boolean,
95
94
  include_links: T::Boolean,
96
95
  max_age_ms: Integer,
97
- parse_pdf: T::Boolean,
96
+ pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash,
98
97
  shorten_base64_images: T::Boolean,
99
98
  timeout_ms: Integer,
100
99
  use_main_content_only: T::Boolean,
@@ -116,10 +115,9 @@ module ContextDev
116
115
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
117
116
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
118
117
  max_age_ms: nil,
119
- # When true (default), PDF URLs are fetched and their text layer is extracted and
120
- # converted to Markdown. When false, PDF URLs are skipped and a 400
121
- # WEBSITE_ACCESS_ERROR is returned.
122
- parse_pdf: nil,
118
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
119
+ # inclusive 1-based page range.
120
+ pdf: nil,
123
121
  # Shorten base64-encoded image data in the Markdown output
124
122
  shorten_base64_images: nil,
125
123
  # Optional timeout in milliseconds for the request. If the request takes longer
@@ -144,7 +142,7 @@ module ContextDev
144
142
  include_images: T::Boolean,
145
143
  include_links: T::Boolean,
146
144
  max_age_ms: Integer,
147
- parse_pdf: T::Boolean,
145
+ pdf: ContextDev::WebWebScrapeMdParams::Pdf,
148
146
  shorten_base64_images: T::Boolean,
149
147
  timeout_ms: Integer,
150
148
  use_main_content_only: T::Boolean,
@@ -155,6 +153,68 @@ module ContextDev
155
153
  end
156
154
  def to_hash
157
155
  end
156
+
157
+ class Pdf < ContextDev::Internal::Type::BaseModel
158
+ OrHash =
159
+ T.type_alias do
160
+ T.any(
161
+ ContextDev::WebWebScrapeMdParams::Pdf,
162
+ ContextDev::Internal::AnyHash
163
+ )
164
+ end
165
+
166
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
167
+ # Must be greater than or equal to start when both are provided.
168
+ sig { returns(T.nilable(Integer)) }
169
+ attr_reader :end_
170
+
171
+ sig { params(end_: Integer).void }
172
+ attr_writer :end_
173
+
174
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
175
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
176
+ sig { returns(T.nilable(T::Boolean)) }
177
+ attr_reader :should_parse
178
+
179
+ sig { params(should_parse: T::Boolean).void }
180
+ attr_writer :should_parse
181
+
182
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
183
+ sig { returns(T.nilable(Integer)) }
184
+ attr_reader :start
185
+
186
+ sig { params(start: Integer).void }
187
+ attr_writer :start
188
+
189
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
190
+ # inclusive 1-based page range.
191
+ sig do
192
+ params(
193
+ end_: Integer,
194
+ should_parse: T::Boolean,
195
+ start: Integer
196
+ ).returns(T.attached_class)
197
+ end
198
+ def self.new(
199
+ # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
200
+ # Must be greater than or equal to start when both are provided.
201
+ end_: nil,
202
+ # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
203
+ # a 400 WEBSITE_ACCESS_ERROR is returned.
204
+ should_parse: nil,
205
+ # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
206
+ start: nil
207
+ )
208
+ end
209
+
210
+ sig do
211
+ override.returns(
212
+ { end_: Integer, should_parse: T::Boolean, start: Integer }
213
+ )
214
+ end
215
+ def to_hash
216
+ end
217
+ end
158
218
  end
159
219
  end
160
220
  end
@@ -122,8 +122,9 @@ module ContextDev
122
122
  max_age_ms: Integer,
123
123
  max_depth: Integer,
124
124
  max_pages: Integer,
125
- parse_pdf: T::Boolean,
125
+ pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash,
126
126
  shorten_base64_images: T::Boolean,
127
+ stop_after_ms: Integer,
127
128
  timeout_ms: Integer,
128
129
  url_regex: String,
129
130
  use_main_content_only: T::Boolean,
@@ -153,12 +154,16 @@ module ContextDev
153
154
  max_depth: nil,
154
155
  # Maximum number of pages to crawl. Hard cap: 500.
155
156
  max_pages: nil,
156
- # When true (default), PDF pages are fetched and their text layer is extracted and
157
- # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
158
- # entirely (not included in results and not counted as failures).
159
- parse_pdf: nil,
157
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
158
+ # inclusive 1-based page range.
159
+ pdf: nil,
160
160
  # Truncate base64-encoded image data in the Markdown output
161
161
  shorten_base64_images: nil,
162
+ # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
163
+ # checks the elapsed time and, if exceeded, returns the pages collected so far
164
+ # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
165
+ # min).
166
+ stop_after_ms: nil,
162
167
  # Optional timeout in milliseconds for the request. If the request takes longer
163
168
  # than this value, it will be aborted with a 408 status code. Maximum allowed
164
169
  # value is 300000ms (5 minutes).
@@ -181,7 +186,7 @@ module ContextDev
181
186
  url: String,
182
187
  include_frames: T::Boolean,
183
188
  max_age_ms: Integer,
184
- parse_pdf: T::Boolean,
189
+ pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash,
185
190
  timeout_ms: Integer,
186
191
  wait_for_ms: Integer,
187
192
  request_options: ContextDev::RequestOptions::OrHash
@@ -196,10 +201,9 @@ module ContextDev
196
201
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
197
202
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
198
203
  max_age_ms: nil,
199
- # When true (default), PDF URLs are fetched and their text layer is extracted and
200
- # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
201
- # and a 400 WEBSITE_ACCESS_ERROR is returned.
202
- parse_pdf: nil,
204
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
205
+ # inclusive 1-based page range.
206
+ pdf: nil,
203
207
  # Optional timeout in milliseconds for the request. If the request takes longer
204
208
  # than this value, it will be aborted with a 408 status code. Maximum allowed
205
209
  # value is 300000ms (5 minutes).
@@ -253,7 +257,7 @@ module ContextDev
253
257
  include_images: T::Boolean,
254
258
  include_links: T::Boolean,
255
259
  max_age_ms: Integer,
256
- parse_pdf: T::Boolean,
260
+ pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash,
257
261
  shorten_base64_images: T::Boolean,
258
262
  timeout_ms: Integer,
259
263
  use_main_content_only: T::Boolean,
@@ -275,10 +279,9 @@ module ContextDev
275
279
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
276
280
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
277
281
  max_age_ms: nil,
278
- # When true (default), PDF URLs are fetched and their text layer is extracted and
279
- # converted to Markdown. When false, PDF URLs are skipped and a 400
280
- # WEBSITE_ACCESS_ERROR is returned.
281
- parse_pdf: nil,
282
+ # PDF parsing controls. Use start/end to limit text extraction and OCR to an
283
+ # inclusive 1-based page range.
284
+ pdf: nil,
282
285
  # Shorten base64-encoded image data in the Markdown output
283
286
  shorten_base64_images: nil,
284
287
  # Optional timeout in milliseconds for the request. If the request takes longer
@@ -10,8 +10,9 @@ module ContextDev
10
10
  max_age_ms: Integer,
11
11
  max_depth: Integer,
12
12
  max_pages: Integer,
13
- parse_pdf: bool,
13
+ pdf: ContextDev::WebWebCrawlMdParams::Pdf,
14
14
  :shorten_base64_images => bool,
15
+ stop_after_ms: Integer,
15
16
  timeout_ms: Integer,
16
17
  url_regex: String,
17
18
  use_main_content_only: bool,
@@ -53,14 +54,20 @@ module ContextDev
53
54
 
54
55
  def max_pages=: (Integer) -> Integer
55
56
 
56
- attr_reader parse_pdf: bool?
57
+ attr_reader pdf: ContextDev::WebWebCrawlMdParams::Pdf?
57
58
 
58
- def parse_pdf=: (bool) -> bool
59
+ def pdf=: (
60
+ ContextDev::WebWebCrawlMdParams::Pdf
61
+ ) -> ContextDev::WebWebCrawlMdParams::Pdf
59
62
 
60
63
  attr_reader shorten_base64_images: bool?
61
64
 
62
65
  def shorten_base64_images=: (bool) -> bool
63
66
 
67
+ attr_reader stop_after_ms: Integer?
68
+
69
+ def stop_after_ms=: (Integer) -> Integer
70
+
64
71
  attr_reader timeout_ms: Integer?
65
72
 
66
73
  def timeout_ms=: (Integer) -> Integer
@@ -86,8 +93,9 @@ module ContextDev
86
93
  ?max_age_ms: Integer,
87
94
  ?max_depth: Integer,
88
95
  ?max_pages: Integer,
89
- ?parse_pdf: bool,
96
+ ?pdf: ContextDev::WebWebCrawlMdParams::Pdf,
90
97
  ?shorten_base64_images: bool,
98
+ ?stop_after_ms: Integer,
91
99
  ?timeout_ms: Integer,
92
100
  ?url_regex: String,
93
101
  ?use_main_content_only: bool,
@@ -104,14 +112,39 @@ module ContextDev
104
112
  max_age_ms: Integer,
105
113
  max_depth: Integer,
106
114
  max_pages: Integer,
107
- parse_pdf: bool,
115
+ pdf: ContextDev::WebWebCrawlMdParams::Pdf,
108
116
  :shorten_base64_images => bool,
117
+ stop_after_ms: Integer,
109
118
  timeout_ms: Integer,
110
119
  url_regex: String,
111
120
  use_main_content_only: bool,
112
121
  wait_for_ms: Integer,
113
122
  request_options: ContextDev::RequestOptions
114
123
  }
124
+
125
+ type pdf = { end_: Integer, should_parse: bool, start: Integer }
126
+
127
+ class Pdf < ContextDev::Internal::Type::BaseModel
128
+ attr_reader end_: Integer?
129
+
130
+ def end_=: (Integer) -> Integer
131
+
132
+ attr_reader should_parse: bool?
133
+
134
+ def should_parse=: (bool) -> bool
135
+
136
+ attr_reader start: Integer?
137
+
138
+ def start=: (Integer) -> Integer
139
+
140
+ def initialize: (
141
+ ?end_: Integer,
142
+ ?should_parse: bool,
143
+ ?start: Integer
144
+ ) -> void
145
+
146
+ def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
147
+ end
115
148
  end
116
149
  end
117
150
  end
@@ -5,7 +5,7 @@ module ContextDev
5
5
  url: String,
6
6
  include_frames: bool,
7
7
  max_age_ms: Integer,
8
- parse_pdf: bool,
8
+ pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
9
9
  timeout_ms: Integer,
10
10
  wait_for_ms: Integer
11
11
  }
@@ -25,9 +25,11 @@ module ContextDev
25
25
 
26
26
  def max_age_ms=: (Integer) -> Integer
27
27
 
28
- attr_reader parse_pdf: bool?
28
+ attr_reader pdf: ContextDev::WebWebScrapeHTMLParams::Pdf?
29
29
 
30
- def parse_pdf=: (bool) -> bool
30
+ def pdf=: (
31
+ ContextDev::WebWebScrapeHTMLParams::Pdf
32
+ ) -> ContextDev::WebWebScrapeHTMLParams::Pdf
31
33
 
32
34
  attr_reader timeout_ms: Integer?
33
35
 
@@ -41,7 +43,7 @@ module ContextDev
41
43
  url: String,
42
44
  ?include_frames: bool,
43
45
  ?max_age_ms: Integer,
44
- ?parse_pdf: bool,
46
+ ?pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
45
47
  ?timeout_ms: Integer,
46
48
  ?wait_for_ms: Integer,
47
49
  ?request_options: ContextDev::request_opts
@@ -51,11 +53,35 @@ module ContextDev
51
53
  url: String,
52
54
  include_frames: bool,
53
55
  max_age_ms: Integer,
54
- parse_pdf: bool,
56
+ pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
55
57
  timeout_ms: Integer,
56
58
  wait_for_ms: Integer,
57
59
  request_options: ContextDev::RequestOptions
58
60
  }
61
+
62
+ type pdf = { end_: Integer, should_parse: bool, start: Integer }
63
+
64
+ class Pdf < ContextDev::Internal::Type::BaseModel
65
+ attr_reader end_: Integer?
66
+
67
+ def end_=: (Integer) -> Integer
68
+
69
+ attr_reader should_parse: bool?
70
+
71
+ def should_parse=: (bool) -> bool
72
+
73
+ attr_reader start: Integer?
74
+
75
+ def start=: (Integer) -> Integer
76
+
77
+ def initialize: (
78
+ ?end_: Integer,
79
+ ?should_parse: bool,
80
+ ?start: Integer
81
+ ) -> void
82
+
83
+ def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
84
+ end
59
85
  end
60
86
  end
61
87
  end
@@ -7,7 +7,7 @@ module ContextDev
7
7
  include_images: bool,
8
8
  include_links: bool,
9
9
  max_age_ms: Integer,
10
- parse_pdf: bool,
10
+ pdf: ContextDev::WebWebScrapeMdParams::Pdf,
11
11
  :shorten_base64_images => bool,
12
12
  timeout_ms: Integer,
13
13
  use_main_content_only: bool,
@@ -37,9 +37,11 @@ module ContextDev
37
37
 
38
38
  def max_age_ms=: (Integer) -> Integer
39
39
 
40
- attr_reader parse_pdf: bool?
40
+ attr_reader pdf: ContextDev::WebWebScrapeMdParams::Pdf?
41
41
 
42
- def parse_pdf=: (bool) -> bool
42
+ def pdf=: (
43
+ ContextDev::WebWebScrapeMdParams::Pdf
44
+ ) -> ContextDev::WebWebScrapeMdParams::Pdf
43
45
 
44
46
  attr_reader shorten_base64_images: bool?
45
47
 
@@ -63,7 +65,7 @@ module ContextDev
63
65
  ?include_images: bool,
64
66
  ?include_links: bool,
65
67
  ?max_age_ms: Integer,
66
- ?parse_pdf: bool,
68
+ ?pdf: ContextDev::WebWebScrapeMdParams::Pdf,
67
69
  ?shorten_base64_images: bool,
68
70
  ?timeout_ms: Integer,
69
71
  ?use_main_content_only: bool,
@@ -77,13 +79,37 @@ module ContextDev
77
79
  include_images: bool,
78
80
  include_links: bool,
79
81
  max_age_ms: Integer,
80
- parse_pdf: bool,
82
+ pdf: ContextDev::WebWebScrapeMdParams::Pdf,
81
83
  :shorten_base64_images => bool,
82
84
  timeout_ms: Integer,
83
85
  use_main_content_only: bool,
84
86
  wait_for_ms: Integer,
85
87
  request_options: ContextDev::RequestOptions
86
88
  }
89
+
90
+ type pdf = { end_: Integer, should_parse: bool, start: Integer }
91
+
92
+ class Pdf < ContextDev::Internal::Type::BaseModel
93
+ attr_reader end_: Integer?
94
+
95
+ def end_=: (Integer) -> Integer
96
+
97
+ attr_reader should_parse: bool?
98
+
99
+ def should_parse=: (bool) -> bool
100
+
101
+ attr_reader start: Integer?
102
+
103
+ def start=: (Integer) -> Integer
104
+
105
+ def initialize: (
106
+ ?end_: Integer,
107
+ ?should_parse: bool,
108
+ ?start: Integer
109
+ ) -> void
110
+
111
+ def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
112
+ end
87
113
  end
88
114
  end
89
115
  end
@@ -36,8 +36,9 @@ module ContextDev
36
36
  ?max_age_ms: Integer,
37
37
  ?max_depth: Integer,
38
38
  ?max_pages: Integer,
39
- ?parse_pdf: bool,
39
+ ?pdf: ContextDev::WebWebCrawlMdParams::Pdf,
40
40
  ?shorten_base64_images: bool,
41
+ ?stop_after_ms: Integer,
41
42
  ?timeout_ms: Integer,
42
43
  ?url_regex: String,
43
44
  ?use_main_content_only: bool,
@@ -49,7 +50,7 @@ module ContextDev
49
50
  url: String,
50
51
  ?include_frames: bool,
51
52
  ?max_age_ms: Integer,
52
- ?parse_pdf: bool,
53
+ ?pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
53
54
  ?timeout_ms: Integer,
54
55
  ?wait_for_ms: Integer,
55
56
  ?request_options: ContextDev::request_opts
@@ -70,7 +71,7 @@ module ContextDev
70
71
  ?include_images: bool,
71
72
  ?include_links: bool,
72
73
  ?max_age_ms: Integer,
73
- ?parse_pdf: bool,
74
+ ?pdf: ContextDev::WebWebScrapeMdParams::Pdf,
74
75
  ?shorten_base64_images: bool,
75
76
  ?timeout_ms: Integer,
76
77
  ?use_main_content_only: bool,
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: context.dev
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.17.0
4
+ version: 1.18.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Context Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-05-09 00:00:00.000000000 Z
11
+ date: 2026-05-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cgi