RubyGems - context.dev - Versions diffs - 1.17.0 → 1.18.0 - Mend

context.dev 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +10 -0
data/README.md +3 -3
data/lib/context_dev/models/web_web_crawl_md_params.rb +53 -8
data/lib/context_dev/models/web_web_crawl_md_response.rb +3 -2
data/lib/context_dev/models/web_web_scrape_html_params.rb +42 -8
data/lib/context_dev/models/web_web_scrape_md_params.rb +42 -8
data/lib/context_dev/resources/web.rb +8 -8
data/lib/context_dev/version.rb +1 -1
data/rbi/context_dev/models/web_web_crawl_md_params.rbi +90 -13
data/rbi/context_dev/models/web_web_crawl_md_response.rbi +4 -2
data/rbi/context_dev/models/web_web_scrape_html_params.rbi +73 -13
data/rbi/context_dev/models/web_web_scrape_md_params.rbi +73 -13
data/rbi/context_dev/resources/web.rbi +18 -15
data/sig/context_dev/models/web_web_crawl_md_params.rbs +38 -5
data/sig/context_dev/models/web_web_scrape_html_params.rbs +31 -5
data/sig/context_dev/models/web_web_scrape_md_params.rbs +31 -5
data/sig/context_dev/resources/web.rbs +4 -3
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e47becb29d02dd213aee4b6b49429685c9d74c35f37d49c026b8bbabc87be191
-  data.tar.gz: 2e76f0c796bf46d9ec0a1f764de47841eabf2cb290c71b279a9740b574393d98
+  metadata.gz: 5a6ec86b2fdf43b0c3f9b800d8b93558a0c93fdf23fd72fad27a46ba43f2dccc
+  data.tar.gz: 8d5533fd69c08516b77b15b9bdcdfafea6cd18f4be1a77d17daa20bad2a6f9b1
 SHA512:
-  metadata.gz: 859d0da8ad2c2e9fd7d0bdb4b783b9e8c75f4323d3a599b42d28ad495342ff1187f6a502449b1a325090f3ccab3f76c1d855e25c5aaaee1e7e98d690e9ac44e4
-  data.tar.gz: 5b30272a7da7dbfeb75475e067f86bb404313ee842aa86b973817ccf359dc07c53fe06890b2e2dc34b689da07ec3b12a2b77a1f871eeaa0c549196f1afbc873d
+  metadata.gz: e98fe1516e060eb780f470e8c5ddc29ba4fdea44c4b11ca40d79d2dfacc7009eb11c95360c19dc0f2864857cc2560ce91569d3587cdaacc8009f757b76a293a8
+  data.tar.gz: 515ce97f5f873d21cef0665843408f507ad5ee2221ae9308d336617e57b2980d0c8add55c46193f2e10c580819cf09654213d32d6f15558f17c6adc87114ef32

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,15 @@
 # Changelog
+## 1.18.0 (2026-05-10)
+Full Changelog: [v1.17.0...v1.18.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.17.0...v1.18.0)
+### Features
+* **api:** api update ([b582c05](https://github.com/context-dot-dev/context-ruby-sdk/commit/b582c05376102bb0cb6f8d4d8c9a2cefdef8c1ec))
+* **api:** api update ([4a4e4bb](https://github.com/context-dot-dev/context-ruby-sdk/commit/4a4e4bbc547662de263a307b213dd7eecd03a61d))
+* **api:** manual updates ([ec963bb](https://github.com/context-dot-dev/context-ruby-sdk/commit/ec963bb99ac36d162552c76fb067e87144f21089))
 ## 1.17.0 (2026-05-09)
 Full Changelog: [v1.16.0...v1.17.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.16.0...v1.17.0)

data/README.md CHANGED Viewed

@@ -8,8 +8,8 @@ It is generated with [Stainless](https://www.stainless.com/).
 Use the Context Dev MCP Server to enable AI assistants to interact with this API, allowing them to explore endpoints, make test requests, and use documentation to help integrate this SDK into your application.
-[![Add to Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=context.dev-mcp&config=eyJuYW1lIjoiY29udGV4dC5kZXYtbWNwIiwidHJhbnNwb3J0IjoiaHR0cCIsInVybCI6Imh0dHBzOi8vY29udGV4dC1kZXYuc3RsbWNwLmNvbSIsImhlYWRlcnMiOnsieC1jb250ZXh0LWRldi1hcGkta2V5IjoiTXkgQVBJIEtleSJ9fQ)
-[![Install in VS Code](https://img.shields.io/badge/_-Add_to_VS_Code-blue?style=for-the-badge&logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGZpbGw9Im5vbmUiIHZpZXdCb3g9IjAgMCA0MCA0MCI+PHBhdGggZmlsbD0iI0VFRSIgZmlsbC1ydWxlPSJldmVub2RkIiBkPSJNMzAuMjM1IDM5Ljg4NGEyLjQ5MSAyLjQ5MSAwIDAgMS0xLjc4MS0uNzNMMTIuNyAyNC43OGwtMy40NiAyLjYyNC0zLjQwNiAyLjU4MmExLjY2NSAxLjY2NSAwIDAgMS0xLjA4Mi4zMzggMS42NjQgMS42NjQgMCAwIDEtMS4wNDYtLjQzMWwtMi4yLTJhMS42NjYgMS42NjYgMCAwIDEgMC0yLjQ2M0w3LjQ1OCAyMCA0LjY3IDE3LjQ1MyAxLjUwNyAxNC41N2ExLjY2NSAxLjY2NSAwIDAgMSAwLTIuNDYzbDIuMi0yYTEuNjY1IDEuNjY1IDAgMCAxIDIuMTMtLjA5N2w2Ljg2MyA1LjIwOUwyOC40NTIuODQ0YTIuNDg4IDIuNDg4IDAgMCAxIDEuODQxLS43MjljLjM1MS4wMDkuNjk5LjA5MSAxLjAxOS4yNDVsOC4yMzYgMy45NjFhMi41IDIuNSAwIDAgMSAxLjQxNSAyLjI1M3YuMDk5LS4wNDVWMzMuMzd2LS4wNDUuMDk1YTIuNTAxIDIuNTAxIDAgMCAxLTEuNDE2IDIuMjU3bC04LjIzNSAzLjk2MWEyLjQ5MiAyLjQ5MiAwIDAgMS0xLjA3Ny4yNDZabS43MTYtMjguOTQ3LTExLjk0OCA5LjA2MiAxMS45NTIgOS4wNjUtLjAwNC0xOC4xMjdaIi8+PC9zdmc+)](https://vscode.stainless.com/mcp/%7B%22name%22%3A%22context.dev-mcp%22%2C%22type%22%3A%22http%22%2C%22url%22%3A%22https%3A%2F%2Fcontext-dev.stlmcp.com%22%2C%22headers%22%3A%7B%22x-context-dev-api-key%22%3A%22My%20API%20Key%22%7D%7D)
+[![Add to Cursor](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=context-dev-mcp&config=eyJuYW1lIjoiY29udGV4dC1kZXYtbWNwIiwidHJhbnNwb3J0IjoiaHR0cCIsInVybCI6Imh0dHBzOi8vY29udGV4dC1kZXYuc3RsbWNwLmNvbSIsImhlYWRlcnMiOnsieC1jb250ZXh0LWRldi1hcGkta2V5IjoiTXkgQVBJIEtleSJ9fQ)
+[![Install in VS Code](https://img.shields.io/badge/_-Add_to_VS_Code-blue?style=for-the-badge&logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIGZpbGw9Im5vbmUiIHZpZXdCb3g9IjAgMCA0MCA0MCI+PHBhdGggZmlsbD0iI0VFRSIgZmlsbC1ydWxlPSJldmVub2RkIiBkPSJNMzAuMjM1IDM5Ljg4NGEyLjQ5MSAyLjQ5MSAwIDAgMS0xLjc4MS0uNzNMMTIuNyAyNC43OGwtMy40NiAyLjYyNC0zLjQwNiAyLjU4MmExLjY2NSAxLjY2NSAwIDAgMS0xLjA4Mi4zMzggMS42NjQgMS42NjQgMCAwIDEtMS4wNDYtLjQzMWwtMi4yLTJhMS42NjYgMS42NjYgMCAwIDEgMC0yLjQ2M0w3LjQ1OCAyMCA0LjY3IDE3LjQ1MyAxLjUwNyAxNC41N2ExLjY2NSAxLjY2NSAwIDAgMSAwLTIuNDYzbDIuMi0yYTEuNjY1IDEuNjY1IDAgMCAxIDIuMTMtLjA5N2w2Ljg2MyA1LjIwOUwyOC40NTIuODQ0YTIuNDg4IDIuNDg4IDAgMCAxIDEuODQxLS43MjljLjM1MS4wMDkuNjk5LjA5MSAxLjAxOS4yNDVsOC4yMzYgMy45NjFhMi41IDIuNSAwIDAgMSAxLjQxNSAyLjI1M3YuMDk5LS4wNDVWMzMuMzd2LS4wNDUuMDk1YTIuNTAxIDIuNTAxIDAgMCAxLTEuNDE2IDIuMjU3bC04LjIzNSAzLjk2MWEyLjQ5MiAyLjQ5MiAwIDAgMS0xLjA3Ny4yNDZabS43MTYtMjguOTQ3LTExLjk0OCA5LjA2MiAxMS45NTIgOS4wNjUtLjAwNC0xOC4xMjdaIi8+PC9zdmc+)](https://vscode.stainless.com/mcp/%7B%22name%22%3A%22context-dev-mcp%22%2C%22type%22%3A%22http%22%2C%22url%22%3A%22https%3A%2F%2Fcontext-dev.stlmcp.com%22%2C%22headers%22%3A%7B%22x-context-dev-api-key%22%3A%22My%20API%20Key%22%7D%7D)
 > Note: You may need to set environment variables in your MCP client.
@@ -26,7 +26,7 @@ To use this gem, install via Bundler by adding the following to your application
 <!-- x-release-please-start-version -->
 ```ruby
-gem "context.dev", "~> 1.17.0"
+gem "context.dev", "~> 1.18.0"
 ```
 <!-- x-release-please-end -->

data/lib/context_dev/models/web_web_crawl_md_params.rb CHANGED Viewed

@@ -60,13 +60,12 @@ module ContextDev
       #   @return [Integer, nil]
       optional :max_pages, Integer, api_name: :maxPages
-      # @!attribute parse_pdf
-      #   When true (default), PDF pages are fetched and their text layer is extracted and
-      #   converted to Markdown alongside HTML pages. When false, PDF pages are skipped
-      #   entirely (not included in results and not counted as failures).
+      # @!attribute pdf
+      #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      #   inclusive 1-based page range.
       #
-      #   @return [Boolean, nil]
-      optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
+      #   @return [ContextDev::Models::WebWebCrawlMdParams::Pdf, nil]
+      optional :pdf, -> { ContextDev::WebWebCrawlMdParams::Pdf }
       # @!attribute shorten_base64_images
       #   Truncate base64-encoded image data in the Markdown output
@@ -74,6 +73,15 @@ module ContextDev
       #   @return [Boolean, nil]
       optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images
+      # @!attribute stop_after_ms
+      #   Soft time budget for the crawl in milliseconds. After each scrape, the crawler
+      #   checks the elapsed time and, if exceeded, returns the pages collected so far
+      #   instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
+      #   min).
+      #
+      #   @return [Integer, nil]
+      optional :stop_after_ms, Integer, api_name: :stopAfterMs
       # @!attribute timeout_ms
       #   Optional timeout in milliseconds for the request. If the request takes longer
       #   than this value, it will be aborted with a 408 status code. Maximum allowed
@@ -102,7 +110,7 @@ module ContextDev
       #   @return [Integer, nil]
       optional :wait_for_ms, Integer, api_name: :waitForMs
-      # @!method initialize(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
+      # @!method initialize(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, pdf: nil, shorten_base64_images: nil, stop_after_ms: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
       #   Some parameter documentations has been truncated, see
       #   {ContextDev::Models::WebWebCrawlMdParams} for more details.
       #
@@ -122,10 +130,12 @@ module ContextDev
       #
       #   @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
       #
-      #   @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
+      #   @param pdf [ContextDev::Models::WebWebCrawlMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       #   @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
       #
+      #   @param stop_after_ms [Integer] Soft time budget for the crawl in milliseconds. After each scrape, the crawler c
+      #
       #   @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
       #
       #   @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -135,6 +145,41 @@ module ContextDev
       #   @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load for each craw
       #
       #   @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        # @!attribute end_
+        #   Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        #   Must be greater than or equal to start when both are provided.
+        #
+        #   @return [Integer, nil]
+        optional :end_, Integer, api_name: :end
+        # @!attribute should_parse
+        #   When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
+        #   entirely (not included in results and not counted as failures).
+        #
+        #   @return [Boolean, nil]
+        optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
+        # @!attribute start
+        #   First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        #
+        #   @return [Integer, nil]
+        optional :start, Integer
+        # @!method initialize(end_: nil, should_parse: nil, start: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {ContextDev::Models::WebWebCrawlMdParams::Pdf} for more details.
+        #
+        #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        #   inclusive 1-based page range.
+        #
+        #   @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
+        #
+        #   @param should_parse [Boolean] When true, PDF pages are fetched and parsed. When false, PDF pages are skipped e
+        #
+        #   @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+      end
     end
   end
 end

data/lib/context_dev/models/web_web_crawl_md_response.rb CHANGED Viewed

@@ -34,7 +34,8 @@ module ContextDev
         required :num_failed, Integer, api_name: :numFailed
         # @!attribute num_skipped
-        #   Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
+        #   Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
+        #   urlRegex)
         #
         #   @return [Integer]
         required :num_skipped, Integer, api_name: :numSkipped
@@ -59,7 +60,7 @@ module ContextDev
         #
         #   @param num_failed [Integer] Number of pages that failed to crawl
         #
-        #   @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
+        #   @param num_skipped [Integer] Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching ur
         #
         #   @param num_succeeded [Integer] Number of pages successfully crawled
         #

data/lib/context_dev/models/web_web_scrape_html_params.rb CHANGED Viewed

@@ -27,13 +27,12 @@ module ContextDev
       #   @return [Integer, nil]
       optional :max_age_ms, Integer
-      # @!attribute parse_pdf
-      #   When true (default), PDF URLs are fetched and their text layer is extracted and
-      #   returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
-      #   and a 400 WEBSITE_ACCESS_ERROR is returned.
+      # @!attribute pdf
+      #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      #   inclusive 1-based page range.
       #
-      #   @return [Boolean, nil]
-      optional :parse_pdf, ContextDev::Internal::Type::Boolean
+      #   @return [ContextDev::Models::WebWebScrapeHTMLParams::Pdf, nil]
+      optional :pdf, -> { ContextDev::WebWebScrapeHTMLParams::Pdf }
       # @!attribute timeout_ms
       #   Optional timeout in milliseconds for the request. If the request takes longer
@@ -50,7 +49,7 @@ module ContextDev
       #   @return [Integer, nil]
       optional :wait_for_ms, Integer
-      # @!method initialize(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
+      # @!method initialize(url:, include_frames: nil, max_age_ms: nil, pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
       #   Some parameter documentations has been truncated, see
       #   {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
       #
@@ -60,13 +59,48 @@ module ContextDev
       #
       #   @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
       #
-      #   @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
+      #   @param pdf [ContextDev::Models::WebWebScrapeHTMLParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       #   @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
       #
       #   @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
       #
       #   @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        # @!attribute end_
+        #   Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        #   Must be greater than or equal to start when both are provided.
+        #
+        #   @return [Integer, nil]
+        optional :end_, Integer, api_name: :end
+        # @!attribute should_parse
+        #   When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        #   a 400 WEBSITE_ACCESS_ERROR is returned.
+        #
+        #   @return [Boolean, nil]
+        optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
+        # @!attribute start
+        #   First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        #
+        #   @return [Integer, nil]
+        optional :start, Integer
+        # @!method initialize(end_: nil, should_parse: nil, start: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {ContextDev::Models::WebWebScrapeHTMLParams::Pdf} for more details.
+        #
+        #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        #   inclusive 1-based page range.
+        #
+        #   @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
+        #
+        #   @param should_parse [Boolean] When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        #
+        #   @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+      end
     end
   end
 end

data/lib/context_dev/models/web_web_scrape_md_params.rb CHANGED Viewed

@@ -40,13 +40,12 @@ module ContextDev
       #   @return [Integer, nil]
       optional :max_age_ms, Integer
-      # @!attribute parse_pdf
-      #   When true (default), PDF URLs are fetched and their text layer is extracted and
-      #   converted to Markdown. When false, PDF URLs are skipped and a 400
-      #   WEBSITE_ACCESS_ERROR is returned.
+      # @!attribute pdf
+      #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      #   inclusive 1-based page range.
       #
-      #   @return [Boolean, nil]
-      optional :parse_pdf, ContextDev::Internal::Type::Boolean
+      #   @return [ContextDev::Models::WebWebScrapeMdParams::Pdf, nil]
+      optional :pdf, -> { ContextDev::WebWebScrapeMdParams::Pdf }
       # @!attribute shorten_base64_images
       #   Shorten base64-encoded image data in the Markdown output
@@ -76,7 +75,7 @@ module ContextDev
       #   @return [Integer, nil]
       optional :wait_for_ms, Integer
-      # @!method initialize(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
+      # @!method initialize(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
       #   Some parameter documentations has been truncated, see
       #   {ContextDev::Models::WebWebScrapeMdParams} for more details.
       #
@@ -90,7 +89,7 @@ module ContextDev
       #
       #   @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
       #
-      #   @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
+      #   @param pdf [ContextDev::Models::WebWebScrapeMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       #   @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
       #
@@ -101,6 +100,41 @@ module ContextDev
       #   @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before conver
       #
       #   @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        # @!attribute end_
+        #   Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        #   Must be greater than or equal to start when both are provided.
+        #
+        #   @return [Integer, nil]
+        optional :end_, Integer, api_name: :end
+        # @!attribute should_parse
+        #   When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        #   a 400 WEBSITE_ACCESS_ERROR is returned.
+        #
+        #   @return [Boolean, nil]
+        optional :should_parse, ContextDev::Internal::Type::Boolean, api_name: :shouldParse
+        # @!attribute start
+        #   First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        #
+        #   @return [Integer, nil]
+        optional :start, Integer
+        # @!method initialize(end_: nil, should_parse: nil, start: nil)
+        #   Some parameter documentations has been truncated, see
+        #   {ContextDev::Models::WebWebScrapeMdParams::Pdf} for more details.
+        #
+        #   PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        #   inclusive 1-based page range.
+        #
+        #   @param end_ [Integer] Last 1-based PDF page to parse. When omitted, parsing ends at the last page. Mus
+        #
+        #   @param should_parse [Boolean] When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        #
+        #   @param start [Integer] First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+      end
     end
   end
 end

data/lib/context_dev/resources/web.rb CHANGED Viewed

@@ -117,7 +117,7 @@ module ContextDev
       # Performs a crawl starting from a given URL, extracts page content as Markdown,
       # and returns results for all crawled pages.
       #
-      # @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
+      # @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, pdf: nil, shorten_base64_images: nil, stop_after_ms: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
       #
       # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
       #
@@ -135,10 +135,12 @@ module ContextDev
       #
       # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
       #
-      # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
+      # @param pdf [ContextDev::Models::WebWebCrawlMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
       #
+      # @param stop_after_ms [Integer] Soft time budget for the crawl in milliseconds. After each scrape, the crawler c
+      #
       # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
       #
       # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -168,7 +170,7 @@ module ContextDev
       #
       # Scrapes the given URL and returns the raw HTML content of the page.
       #
-      # @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
+      # @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
       #
       # @param url [String] Full URL to scrape (must include http:// or https:// protocol)
       #
@@ -176,7 +178,7 @@ module ContextDev
       #
       # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
       #
-      # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
+      # @param pdf [ContextDev::Models::WebWebScrapeHTMLParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       # @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
       #
@@ -196,7 +198,6 @@ module ContextDev
           query: query.transform_keys(
             include_frames: "includeFrames",
             max_age_ms: "maxAgeMs",
-            parse_pdf: "parsePDF",
             timeout_ms: "timeoutMS",
             wait_for_ms: "waitForMs"
           ),
@@ -251,7 +252,7 @@ module ContextDev
       #
       # Scrapes the given URL into LLM usable Markdown.
       #
-      # @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
+      # @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
       #
       # @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
       #
@@ -263,7 +264,7 @@ module ContextDev
       #
       # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
       #
-      # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
+      # @param pdf [ContextDev::Models::WebWebScrapeMdParams::Pdf] PDF parsing controls. Use start/end to limit text extraction and OCR to an inclu
       #
       # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
       #
@@ -289,7 +290,6 @@ module ContextDev
             include_images: "includeImages",
             include_links: "includeLinks",
             max_age_ms: "maxAgeMs",
-            parse_pdf: "parsePDF",
             shorten_base64_images: "shortenBase64Images",
             timeout_ms: "timeoutMS",
             use_main_content_only: "useMainContentOnly",

data/lib/context_dev/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module ContextDev
-  VERSION = "1.17.0"
+  VERSION = "1.18.0"
 end

data/rbi/context_dev/models/web_web_crawl_md_params.rbi CHANGED Viewed

@@ -69,14 +69,13 @@ module ContextDev
       sig { params(max_pages: Integer).void }
       attr_writer :max_pages
-      # When true (default), PDF pages are fetched and their text layer is extracted and
-      # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
-      # entirely (not included in results and not counted as failures).
-      sig { returns(T.nilable(T::Boolean)) }
-      attr_reader :parse_pdf
+      # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      # inclusive 1-based page range.
+      sig { returns(T.nilable(ContextDev::WebWebCrawlMdParams::Pdf)) }
+      attr_reader :pdf
-      sig { params(parse_pdf: T::Boolean).void }
-      attr_writer :parse_pdf
+      sig { params(pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash).void }
+      attr_writer :pdf
       # Truncate base64-encoded image data in the Markdown output
       sig { returns(T.nilable(T::Boolean)) }
@@ -85,6 +84,16 @@ module ContextDev
       sig { params(shorten_base64_images: T::Boolean).void }
       attr_writer :shorten_base64_images
+      # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
+      # checks the elapsed time and, if exceeded, returns the pages collected so far
+      # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
+      # min).
+      sig { returns(T.nilable(Integer)) }
+      attr_reader :stop_after_ms
+      sig { params(stop_after_ms: Integer).void }
+      attr_writer :stop_after_ms
       # Optional timeout in milliseconds for the request. If the request takes longer
       # than this value, it will be aborted with a 408 status code. Maximum allowed
       # value is 300000ms (5 minutes).
@@ -127,8 +136,9 @@ module ContextDev
           max_age_ms: Integer,
           max_depth: Integer,
           max_pages: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash,
           shorten_base64_images: T::Boolean,
+          stop_after_ms: Integer,
           timeout_ms: Integer,
           url_regex: String,
           use_main_content_only: T::Boolean,
@@ -158,12 +168,16 @@ module ContextDev
         max_depth: nil,
         # Maximum number of pages to crawl. Hard cap: 500.
         max_pages: nil,
-        # When true (default), PDF pages are fetched and their text layer is extracted and
-        # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
-        # entirely (not included in results and not counted as failures).
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Truncate base64-encoded image data in the Markdown output
         shorten_base64_images: nil,
+        # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
+        # checks the elapsed time and, if exceeded, returns the pages collected so far
+        # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
+        # min).
+        stop_after_ms: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer
         # than this value, it will be aborted with a 408 status code. Maximum allowed
         # value is 300000ms (5 minutes).
@@ -191,8 +205,9 @@ module ContextDev
             max_age_ms: Integer,
             max_depth: Integer,
             max_pages: Integer,
-            parse_pdf: T::Boolean,
+            pdf: ContextDev::WebWebCrawlMdParams::Pdf,
             shorten_base64_images: T::Boolean,
+            stop_after_ms: Integer,
             timeout_ms: Integer,
             url_regex: String,
             use_main_content_only: T::Boolean,
@@ -203,6 +218,68 @@ module ContextDev
       end
       def to_hash
       end
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        OrHash =
+          T.type_alias do
+            T.any(
+              ContextDev::WebWebCrawlMdParams::Pdf,
+              ContextDev::Internal::AnyHash
+            )
+          end
+        # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        # Must be greater than or equal to start when both are provided.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :end_
+        sig { params(end_: Integer).void }
+        attr_writer :end_
+        # When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
+        # entirely (not included in results and not counted as failures).
+        sig { returns(T.nilable(T::Boolean)) }
+        attr_reader :should_parse
+        sig { params(should_parse: T::Boolean).void }
+        attr_writer :should_parse
+        # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :start
+        sig { params(start: Integer).void }
+        attr_writer :start
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        sig do
+          params(
+            end_: Integer,
+            should_parse: T::Boolean,
+            start: Integer
+          ).returns(T.attached_class)
+        end
+        def self.new(
+          # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+          # Must be greater than or equal to start when both are provided.
+          end_: nil,
+          # When true, PDF pages are fetched and parsed. When false, PDF pages are skipped
+          # entirely (not included in results and not counted as failures).
+          should_parse: nil,
+          # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+          start: nil
+        )
+        end
+        sig do
+          override.returns(
+            { end_: Integer, should_parse: T::Boolean, start: Integer }
+          )
+        end
+        def to_hash
+        end
+      end
     end
   end
 end

data/rbi/context_dev/models/web_web_crawl_md_response.rbi CHANGED Viewed

@@ -64,7 +64,8 @@ module ContextDev
         sig { returns(Integer) }
         attr_accessor :num_failed
-        # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
+        # Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
+        # urlRegex)
         sig { returns(Integer) }
         attr_accessor :num_skipped
@@ -90,7 +91,8 @@ module ContextDev
           max_crawl_depth:,
           # Number of pages that failed to crawl
           num_failed:,
-          # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
+          # Number of URLs skipped (PDFs when pdf.shouldParse=false, or URLs not matching
+          # urlRegex)
           num_skipped:,
           # Number of pages successfully crawled
           num_succeeded:,

data/rbi/context_dev/models/web_web_scrape_html_params.rbi CHANGED Viewed

@@ -34,14 +34,13 @@ module ContextDev
       sig { params(max_age_ms: Integer).void }
       attr_writer :max_age_ms
-      # When true (default), PDF URLs are fetched and their text layer is extracted and
-      # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
-      # and a 400 WEBSITE_ACCESS_ERROR is returned.
-      sig { returns(T.nilable(T::Boolean)) }
-      attr_reader :parse_pdf
+      # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      # inclusive 1-based page range.
+      sig { returns(T.nilable(ContextDev::WebWebScrapeHTMLParams::Pdf)) }
+      attr_reader :pdf
-      sig { params(parse_pdf: T::Boolean).void }
-      attr_writer :parse_pdf
+      sig { params(pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash).void }
+      attr_writer :pdf
       # Optional timeout in milliseconds for the request. If the request takes longer
       # than this value, it will be aborted with a 408 status code. Maximum allowed
@@ -65,7 +64,7 @@ module ContextDev
           url: String,
           include_frames: T::Boolean,
           max_age_ms: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash,
           timeout_ms: Integer,
           wait_for_ms: Integer,
           request_options: ContextDev::RequestOptions::OrHash
@@ -80,10 +79,9 @@ module ContextDev
         # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
         # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
         max_age_ms: nil,
-        # When true (default), PDF URLs are fetched and their text layer is extracted and
-        # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
-        # and a 400 WEBSITE_ACCESS_ERROR is returned.
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer
         # than this value, it will be aborted with a 408 status code. Maximum allowed
         # value is 300000ms (5 minutes).
@@ -101,7 +99,7 @@ module ContextDev
             url: String,
             include_frames: T::Boolean,
             max_age_ms: Integer,
-            parse_pdf: T::Boolean,
+            pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
             timeout_ms: Integer,
             wait_for_ms: Integer,
             request_options: ContextDev::RequestOptions
@@ -110,6 +108,68 @@ module ContextDev
       end
       def to_hash
       end
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        OrHash =
+          T.type_alias do
+            T.any(
+              ContextDev::WebWebScrapeHTMLParams::Pdf,
+              ContextDev::Internal::AnyHash
+            )
+          end
+        # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        # Must be greater than or equal to start when both are provided.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :end_
+        sig { params(end_: Integer).void }
+        attr_writer :end_
+        # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        # a 400 WEBSITE_ACCESS_ERROR is returned.
+        sig { returns(T.nilable(T::Boolean)) }
+        attr_reader :should_parse
+        sig { params(should_parse: T::Boolean).void }
+        attr_writer :should_parse
+        # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :start
+        sig { params(start: Integer).void }
+        attr_writer :start
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        sig do
+          params(
+            end_: Integer,
+            should_parse: T::Boolean,
+            start: Integer
+          ).returns(T.attached_class)
+        end
+        def self.new(
+          # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+          # Must be greater than or equal to start when both are provided.
+          end_: nil,
+          # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+          # a 400 WEBSITE_ACCESS_ERROR is returned.
+          should_parse: nil,
+          # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+          start: nil
+        )
+        end
+        sig do
+          override.returns(
+            { end_: Integer, should_parse: T::Boolean, start: Integer }
+          )
+        end
+        def to_hash
+        end
+      end
     end
   end
 end

data/rbi/context_dev/models/web_web_scrape_md_params.rbi CHANGED Viewed

@@ -46,14 +46,13 @@ module ContextDev
       sig { params(max_age_ms: Integer).void }
       attr_writer :max_age_ms
-      # When true (default), PDF URLs are fetched and their text layer is extracted and
-      # converted to Markdown. When false, PDF URLs are skipped and a 400
-      # WEBSITE_ACCESS_ERROR is returned.
-      sig { returns(T.nilable(T::Boolean)) }
-      attr_reader :parse_pdf
+      # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+      # inclusive 1-based page range.
+      sig { returns(T.nilable(ContextDev::WebWebScrapeMdParams::Pdf)) }
+      attr_reader :pdf
-      sig { params(parse_pdf: T::Boolean).void }
-      attr_writer :parse_pdf
+      sig { params(pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash).void }
+      attr_writer :pdf
       # Shorten base64-encoded image data in the Markdown output
       sig { returns(T.nilable(T::Boolean)) }
@@ -94,7 +93,7 @@ module ContextDev
           include_images: T::Boolean,
           include_links: T::Boolean,
           max_age_ms: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash,
           shorten_base64_images: T::Boolean,
           timeout_ms: Integer,
           use_main_content_only: T::Boolean,
@@ -116,10 +115,9 @@ module ContextDev
         # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
         # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
         max_age_ms: nil,
-        # When true (default), PDF URLs are fetched and their text layer is extracted and
-        # converted to Markdown. When false, PDF URLs are skipped and a 400
-        # WEBSITE_ACCESS_ERROR is returned.
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Shorten base64-encoded image data in the Markdown output
         shorten_base64_images: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer
@@ -144,7 +142,7 @@ module ContextDev
             include_images: T::Boolean,
             include_links: T::Boolean,
             max_age_ms: Integer,
-            parse_pdf: T::Boolean,
+            pdf: ContextDev::WebWebScrapeMdParams::Pdf,
             shorten_base64_images: T::Boolean,
             timeout_ms: Integer,
             use_main_content_only: T::Boolean,
@@ -155,6 +153,68 @@ module ContextDev
       end
       def to_hash
       end
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        OrHash =
+          T.type_alias do
+            T.any(
+              ContextDev::WebWebScrapeMdParams::Pdf,
+              ContextDev::Internal::AnyHash
+            )
+          end
+        # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+        # Must be greater than or equal to start when both are provided.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :end_
+        sig { params(end_: Integer).void }
+        attr_writer :end_
+        # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+        # a 400 WEBSITE_ACCESS_ERROR is returned.
+        sig { returns(T.nilable(T::Boolean)) }
+        attr_reader :should_parse
+        sig { params(should_parse: T::Boolean).void }
+        attr_writer :should_parse
+        # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+        sig { returns(T.nilable(Integer)) }
+        attr_reader :start
+        sig { params(start: Integer).void }
+        attr_writer :start
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        sig do
+          params(
+            end_: Integer,
+            should_parse: T::Boolean,
+            start: Integer
+          ).returns(T.attached_class)
+        end
+        def self.new(
+          # Last 1-based PDF page to parse. When omitted, parsing ends at the last page.
+          # Must be greater than or equal to start when both are provided.
+          end_: nil,
+          # When true, PDF URLs are fetched and parsed. When false, PDF URLs are skipped and
+          # a 400 WEBSITE_ACCESS_ERROR is returned.
+          should_parse: nil,
+          # First 1-based PDF page to parse. When omitted, parsing starts at the first page.
+          start: nil
+        )
+        end
+        sig do
+          override.returns(
+            { end_: Integer, should_parse: T::Boolean, start: Integer }
+          )
+        end
+        def to_hash
+        end
+      end
     end
   end
 end

data/rbi/context_dev/resources/web.rbi CHANGED Viewed

@@ -122,8 +122,9 @@ module ContextDev
           max_age_ms: Integer,
           max_depth: Integer,
           max_pages: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebCrawlMdParams::Pdf::OrHash,
           shorten_base64_images: T::Boolean,
+          stop_after_ms: Integer,
           timeout_ms: Integer,
           url_regex: String,
           use_main_content_only: T::Boolean,
@@ -153,12 +154,16 @@ module ContextDev
         max_depth: nil,
         # Maximum number of pages to crawl. Hard cap: 500.
         max_pages: nil,
-        # When true (default), PDF pages are fetched and their text layer is extracted and
-        # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
-        # entirely (not included in results and not counted as failures).
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Truncate base64-encoded image data in the Markdown output
         shorten_base64_images: nil,
+        # Soft time budget for the crawl in milliseconds. After each scrape, the crawler
+        # checks the elapsed time and, if exceeded, returns the pages collected so far
+        # instead of continuing. Min: 10000 (10s). Max: 240000 (4 min). Default: 120000 (2
+        # min).
+        stop_after_ms: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer
         # than this value, it will be aborted with a 408 status code. Maximum allowed
         # value is 300000ms (5 minutes).
@@ -181,7 +186,7 @@ module ContextDev
           url: String,
           include_frames: T::Boolean,
           max_age_ms: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebScrapeHTMLParams::Pdf::OrHash,
           timeout_ms: Integer,
           wait_for_ms: Integer,
           request_options: ContextDev::RequestOptions::OrHash
@@ -196,10 +201,9 @@ module ContextDev
         # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
         # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
         max_age_ms: nil,
-        # When true (default), PDF URLs are fetched and their text layer is extracted and
-        # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
-        # and a 400 WEBSITE_ACCESS_ERROR is returned.
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer
         # than this value, it will be aborted with a 408 status code. Maximum allowed
         # value is 300000ms (5 minutes).
@@ -253,7 +257,7 @@ module ContextDev
           include_images: T::Boolean,
           include_links: T::Boolean,
           max_age_ms: Integer,
-          parse_pdf: T::Boolean,
+          pdf: ContextDev::WebWebScrapeMdParams::Pdf::OrHash,
           shorten_base64_images: T::Boolean,
           timeout_ms: Integer,
           use_main_content_only: T::Boolean,
@@ -275,10 +279,9 @@ module ContextDev
         # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
         # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
         max_age_ms: nil,
-        # When true (default), PDF URLs are fetched and their text layer is extracted and
-        # converted to Markdown. When false, PDF URLs are skipped and a 400
-        # WEBSITE_ACCESS_ERROR is returned.
-        parse_pdf: nil,
+        # PDF parsing controls. Use start/end to limit text extraction and OCR to an
+        # inclusive 1-based page range.
+        pdf: nil,
         # Shorten base64-encoded image data in the Markdown output
         shorten_base64_images: nil,
         # Optional timeout in milliseconds for the request. If the request takes longer

data/sig/context_dev/models/web_web_crawl_md_params.rbs CHANGED Viewed

@@ -10,8 +10,9 @@ module ContextDev
         max_age_ms: Integer,
         max_depth: Integer,
         max_pages: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebCrawlMdParams::Pdf,
         :shorten_base64_images => bool,
+        stop_after_ms: Integer,
         timeout_ms: Integer,
         url_regex: String,
         use_main_content_only: bool,
@@ -53,14 +54,20 @@ module ContextDev
       def max_pages=: (Integer) -> Integer
-      attr_reader parse_pdf: bool?
+      attr_reader pdf: ContextDev::WebWebCrawlMdParams::Pdf?
-      def parse_pdf=: (bool) -> bool
+      def pdf=: (
+        ContextDev::WebWebCrawlMdParams::Pdf
+      ) -> ContextDev::WebWebCrawlMdParams::Pdf
       attr_reader shorten_base64_images: bool?
       def shorten_base64_images=: (bool) -> bool
+      attr_reader stop_after_ms: Integer?
+      def stop_after_ms=: (Integer) -> Integer
       attr_reader timeout_ms: Integer?
       def timeout_ms=: (Integer) -> Integer
@@ -86,8 +93,9 @@ module ContextDev
         ?max_age_ms: Integer,
         ?max_depth: Integer,
         ?max_pages: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebCrawlMdParams::Pdf,
         ?shorten_base64_images: bool,
+        ?stop_after_ms: Integer,
         ?timeout_ms: Integer,
         ?url_regex: String,
         ?use_main_content_only: bool,
@@ -104,14 +112,39 @@ module ContextDev
         max_age_ms: Integer,
         max_depth: Integer,
         max_pages: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebCrawlMdParams::Pdf,
         :shorten_base64_images => bool,
+        stop_after_ms: Integer,
         timeout_ms: Integer,
         url_regex: String,
         use_main_content_only: bool,
         wait_for_ms: Integer,
         request_options: ContextDev::RequestOptions
       }
+      type pdf = { end_: Integer, should_parse: bool, start: Integer }
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        attr_reader end_: Integer?
+        def end_=: (Integer) -> Integer
+        attr_reader should_parse: bool?
+        def should_parse=: (bool) -> bool
+        attr_reader start: Integer?
+        def start=: (Integer) -> Integer
+        def initialize: (
+          ?end_: Integer,
+          ?should_parse: bool,
+          ?start: Integer
+        ) -> void
+        def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
+      end
     end
   end
 end

data/sig/context_dev/models/web_web_scrape_html_params.rbs CHANGED Viewed

@@ -5,7 +5,7 @@ module ContextDev
         url: String,
         include_frames: bool,
         max_age_ms: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
         timeout_ms: Integer,
         wait_for_ms: Integer
       }
@@ -25,9 +25,11 @@ module ContextDev
       def max_age_ms=: (Integer) -> Integer
-      attr_reader parse_pdf: bool?
+      attr_reader pdf: ContextDev::WebWebScrapeHTMLParams::Pdf?
-      def parse_pdf=: (bool) -> bool
+      def pdf=: (
+        ContextDev::WebWebScrapeHTMLParams::Pdf
+      ) -> ContextDev::WebWebScrapeHTMLParams::Pdf
       attr_reader timeout_ms: Integer?
@@ -41,7 +43,7 @@ module ContextDev
         url: String,
         ?include_frames: bool,
         ?max_age_ms: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
         ?timeout_ms: Integer,
         ?wait_for_ms: Integer,
         ?request_options: ContextDev::request_opts
@@ -51,11 +53,35 @@ module ContextDev
         url: String,
         include_frames: bool,
         max_age_ms: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
         timeout_ms: Integer,
         wait_for_ms: Integer,
         request_options: ContextDev::RequestOptions
       }
+      type pdf = { end_: Integer, should_parse: bool, start: Integer }
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        attr_reader end_: Integer?
+        def end_=: (Integer) -> Integer
+        attr_reader should_parse: bool?
+        def should_parse=: (bool) -> bool
+        attr_reader start: Integer?
+        def start=: (Integer) -> Integer
+        def initialize: (
+          ?end_: Integer,
+          ?should_parse: bool,
+          ?start: Integer
+        ) -> void
+        def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
+      end
     end
   end
 end

data/sig/context_dev/models/web_web_scrape_md_params.rbs CHANGED Viewed

@@ -7,7 +7,7 @@ module ContextDev
         include_images: bool,
         include_links: bool,
         max_age_ms: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebScrapeMdParams::Pdf,
         :shorten_base64_images => bool,
         timeout_ms: Integer,
         use_main_content_only: bool,
@@ -37,9 +37,11 @@ module ContextDev
       def max_age_ms=: (Integer) -> Integer
-      attr_reader parse_pdf: bool?
+      attr_reader pdf: ContextDev::WebWebScrapeMdParams::Pdf?
-      def parse_pdf=: (bool) -> bool
+      def pdf=: (
+        ContextDev::WebWebScrapeMdParams::Pdf
+      ) -> ContextDev::WebWebScrapeMdParams::Pdf
       attr_reader shorten_base64_images: bool?
@@ -63,7 +65,7 @@ module ContextDev
         ?include_images: bool,
         ?include_links: bool,
         ?max_age_ms: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebScrapeMdParams::Pdf,
         ?shorten_base64_images: bool,
         ?timeout_ms: Integer,
         ?use_main_content_only: bool,
@@ -77,13 +79,37 @@ module ContextDev
         include_images: bool,
         include_links: bool,
         max_age_ms: Integer,
-        parse_pdf: bool,
+        pdf: ContextDev::WebWebScrapeMdParams::Pdf,
         :shorten_base64_images => bool,
         timeout_ms: Integer,
         use_main_content_only: bool,
         wait_for_ms: Integer,
         request_options: ContextDev::RequestOptions
       }
+      type pdf = { end_: Integer, should_parse: bool, start: Integer }
+      class Pdf < ContextDev::Internal::Type::BaseModel
+        attr_reader end_: Integer?
+        def end_=: (Integer) -> Integer
+        attr_reader should_parse: bool?
+        def should_parse=: (bool) -> bool
+        attr_reader start: Integer?
+        def start=: (Integer) -> Integer
+        def initialize: (
+          ?end_: Integer,
+          ?should_parse: bool,
+          ?start: Integer
+        ) -> void
+        def to_hash: -> { end_: Integer, should_parse: bool, start: Integer }
+      end
     end
   end
 end

data/sig/context_dev/resources/web.rbs CHANGED Viewed

@@ -36,8 +36,9 @@ module ContextDev
         ?max_age_ms: Integer,
         ?max_depth: Integer,
         ?max_pages: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebCrawlMdParams::Pdf,
         ?shorten_base64_images: bool,
+        ?stop_after_ms: Integer,
         ?timeout_ms: Integer,
         ?url_regex: String,
         ?use_main_content_only: bool,
@@ -49,7 +50,7 @@ module ContextDev
         url: String,
         ?include_frames: bool,
         ?max_age_ms: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebScrapeHTMLParams::Pdf,
         ?timeout_ms: Integer,
         ?wait_for_ms: Integer,
         ?request_options: ContextDev::request_opts
@@ -70,7 +71,7 @@ module ContextDev
         ?include_images: bool,
         ?include_links: bool,
         ?max_age_ms: Integer,
-        ?parse_pdf: bool,
+        ?pdf: ContextDev::WebWebScrapeMdParams::Pdf,
         ?shorten_base64_images: bool,
         ?timeout_ms: Integer,
         ?use_main_content_only: bool,

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: context.dev
 version: !ruby/object:Gem::Version
-  version: 1.17.0
+  version: 1.18.0
 platform: ruby
 authors:
 - Context Dev
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-05-09 00:00:00.000000000 Z
+date: 2026-05-10 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: cgi