context.dev 1.6.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0e9ffccdbada64211d1e7ddc15ce648d6e4ec77637768a30f71d754d9a8ff788
4
- data.tar.gz: a95c5065d4f22c3a1269026fd93b936086316cb9a1a9e8de5af8e7c6021f0bcc
3
+ metadata.gz: 968dddadfbf1199743ee57bc82a47fd2d7b882da416170b13f41bf0574828d5d
4
+ data.tar.gz: 5e35c5601c635fdc8182a81e5738418f4e4d879231cbaea89b1722ba5f28db00
5
5
  SHA512:
6
- metadata.gz: 1bf660423aa6381d2301614c3987aedcf0edf8bec72bd2eca8b1ee88d4190cb5a4f8c8dd453783632ad1ed79da87bddbfadf09341c870947cba5784d08600610
7
- data.tar.gz: aa6851f96a3b0295cbc4386ea424b6e4f856889aac74bcd21e8ad7fd6735f5822ae5ed68306891ef3779dfdf1b590fcc32ffb8f069bd78599f8eacb4c6b5b882
6
+ metadata.gz: 9ffe4d5dde7376e41afd9607af63fcecd7ab97d8abf26a2adceef1a30594ec54dd10356c731c54dd97ebad44e00eb2d24132ebe27c25a95acfd8a568578557a9
7
+ data.tar.gz: 2fc80f2bc42e75a0dc03f4c3692f45e6eadce167280695690d69bc408c87eb9504fb557cd661c33041bff9c59876a6f47bbd8ba360b092ab762e70221b79e3ff
data/CHANGELOG.md CHANGED
@@ -1,5 +1,22 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.8.0 (2026-04-24)
4
+
5
+ Full Changelog: [v1.7.0...v1.8.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.7.0...v1.8.0)
6
+
7
+ ### Features
8
+
9
+ * **api:** api update ([84bfef2](https://github.com/context-dot-dev/context-ruby-sdk/commit/84bfef260d99a7a444c243edc4be1c72191929dd))
10
+ * **api:** api update ([5405b96](https://github.com/context-dot-dev/context-ruby-sdk/commit/5405b9676800e2004d92dd9f621dfc56972bacd2))
11
+
12
+ ## 1.7.0 (2026-04-24)
13
+
14
+ Full Changelog: [v1.6.0...v1.7.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.6.0...v1.7.0)
15
+
16
+ ### Features
17
+
18
+ * **api:** api update ([9c8b4d9](https://github.com/context-dot-dev/context-ruby-sdk/commit/9c8b4d9b1d813c5b7992998a3fa23cde63fe2f4c))
19
+
3
20
  ## 1.6.0 (2026-04-23)
4
21
 
5
22
  Full Changelog: [v1.5.0...v1.6.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.5.0...v1.6.0)
data/README.md CHANGED
@@ -26,7 +26,7 @@ To use this gem, install via Bundler by adding the following to your application
26
26
  <!-- x-release-please-start-version -->
27
27
 
28
28
  ```ruby
29
- gem "context.dev", "~> 1.6.0"
29
+ gem "context.dev", "~> 1.8.0"
30
30
  ```
31
31
 
32
32
  <!-- x-release-please-end -->
@@ -70,6 +70,12 @@ module ContextDev
70
70
  # @return [String]
71
71
  required :name, String
72
72
 
73
+ # @!attribute sku
74
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
75
+ #
76
+ # @return [String, nil]
77
+ required :sku, String, nil?: true
78
+
73
79
  # @!attribute tags
74
80
  # Tags associated with the product
75
81
  #
@@ -128,7 +134,7 @@ module ContextDev
128
134
  # @return [String, nil]
129
135
  optional :url, String, nil?: true
130
136
 
131
- # @!method initialize(description:, features:, images:, name:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
137
+ # @!method initialize(description:, features:, images:, name:, sku:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
132
138
  # The extracted product data, or null if not a product page
133
139
  #
134
140
  # @param description [String] Description of the product
@@ -139,6 +145,8 @@ module ContextDev
139
145
  #
140
146
  # @param name [String] Name of the product
141
147
  #
148
+ # @param sku [String, nil] Stock Keeping Unit (product identifier). Null if no identifier is found.
149
+ #
142
150
  # @param tags [Array<String>] Tags associated with the product
143
151
  #
144
152
  # @param target_audience [Array<String>] Target audience for the product (array of strings)
@@ -39,6 +39,12 @@ module ContextDev
39
39
  # @return [String]
40
40
  required :name, String
41
41
 
42
+ # @!attribute sku
43
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
44
+ #
45
+ # @return [String, nil]
46
+ required :sku, String, nil?: true
47
+
42
48
  # @!attribute tags
43
49
  # Tags associated with the product
44
50
  #
@@ -97,7 +103,7 @@ module ContextDev
97
103
  # @return [String, nil]
98
104
  optional :url, String, nil?: true
99
105
 
100
- # @!method initialize(description:, features:, images:, name:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
106
+ # @!method initialize(description:, features:, images:, name:, sku:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
101
107
  # @param description [String] Description of the product
102
108
  #
103
109
  # @param features [Array<String>] List of product features
@@ -106,6 +112,8 @@ module ContextDev
106
112
  #
107
113
  # @param name [String] Name of the product
108
114
  #
115
+ # @param sku [String, nil] Stock Keeping Unit (product identifier). Null if no identifier is found.
116
+ #
109
117
  # @param tags [Array<String>] Tags associated with the product
110
118
  #
111
119
  # @param target_audience [Array<String>] Target audience for the product (array of strings)
@@ -53,6 +53,14 @@ module ContextDev
53
53
  # @return [Integer, nil]
54
54
  optional :max_pages, Integer, api_name: :maxPages
55
55
 
56
+ # @!attribute parse_pdf
57
+ # When true (default), PDF pages are fetched and their text layer is extracted and
58
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
59
+ # entirely (not included in results and not counted as failures).
60
+ #
61
+ # @return [Boolean, nil]
62
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
63
+
56
64
  # @!attribute shorten_base64_images
57
65
  # Truncate base64-encoded image data in the Markdown output
58
66
  #
@@ -72,7 +80,7 @@ module ContextDev
72
80
  # @return [Boolean, nil]
73
81
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
74
82
 
75
- # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
83
+ # @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
76
84
  # Some parameter documentations has been truncated, see
77
85
  # {ContextDev::Models::WebWebCrawlMdParams} for more details.
78
86
  #
@@ -90,6 +98,8 @@ module ContextDev
90
98
  #
91
99
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
92
100
  #
101
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
102
+ #
93
103
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
94
104
  #
95
105
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -33,6 +33,12 @@ module ContextDev
33
33
  # @return [Integer]
34
34
  required :num_failed, Integer, api_name: :numFailed
35
35
 
36
+ # @!attribute num_skipped
37
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
38
+ #
39
+ # @return [Integer]
40
+ required :num_skipped, Integer, api_name: :numSkipped
41
+
36
42
  # @!attribute num_succeeded
37
43
  # Number of pages successfully crawled
38
44
  #
@@ -45,11 +51,16 @@ module ContextDev
45
51
  # @return [Integer]
46
52
  required :num_urls, Integer, api_name: :numUrls
47
53
 
48
- # @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
54
+ # @!method initialize(max_crawl_depth:, num_failed:, num_skipped:, num_succeeded:, num_urls:)
55
+ # Some parameter documentations has been truncated, see
56
+ # {ContextDev::Models::WebWebCrawlMdResponse::Metadata} for more details.
57
+ #
49
58
  # @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
50
59
  #
51
60
  # @param num_failed [Integer] Number of pages that failed to crawl
52
61
  #
62
+ # @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
63
+ #
53
64
  # @param num_succeeded [Integer] Number of pages successfully crawled
54
65
  #
55
66
  # @param num_urls [Integer] Total number of URLs crawled
@@ -21,7 +21,15 @@ module ContextDev
21
21
  # @return [Integer, nil]
22
22
  optional :max_age_ms, Integer
23
23
 
24
- # @!method initialize(url:, max_age_ms: nil, request_options: {})
24
+ # @!attribute parse_pdf
25
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
26
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
27
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
28
+ #
29
+ # @return [Boolean, nil]
30
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
31
+
32
+ # @!method initialize(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
25
33
  # Some parameter documentations has been truncated, see
26
34
  # {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
27
35
  #
@@ -29,6 +37,8 @@ module ContextDev
29
37
  #
30
38
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
31
39
  #
40
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
41
+ #
32
42
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
33
43
  end
34
44
  end
@@ -34,6 +34,14 @@ module ContextDev
34
34
  # @return [Integer, nil]
35
35
  optional :max_age_ms, Integer
36
36
 
37
+ # @!attribute parse_pdf
38
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
39
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
40
+ # WEBSITE_ACCESS_ERROR is returned.
41
+ #
42
+ # @return [Boolean, nil]
43
+ optional :parse_pdf, ContextDev::Internal::Type::Boolean
44
+
37
45
  # @!attribute shorten_base64_images
38
46
  # Shorten base64-encoded image data in the Markdown output
39
47
  #
@@ -47,7 +55,7 @@ module ContextDev
47
55
  # @return [Boolean, nil]
48
56
  optional :use_main_content_only, ContextDev::Internal::Type::Boolean
49
57
 
50
- # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
58
+ # @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
51
59
  # Some parameter documentations has been truncated, see
52
60
  # {ContextDev::Models::WebWebScrapeMdParams} for more details.
53
61
  #
@@ -59,6 +67,8 @@ module ContextDev
59
67
  #
60
68
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
61
69
  #
70
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
71
+ #
62
72
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
63
73
  #
64
74
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -105,7 +105,7 @@ module ContextDev
105
105
  # Performs a crawl starting from a given URL, extracts page content as Markdown,
106
106
  # and returns results for all crawled pages.
107
107
  #
108
- # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
108
+ # @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
109
109
  #
110
110
  # @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
111
111
  #
@@ -121,6 +121,8 @@ module ContextDev
121
121
  #
122
122
  # @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
123
123
  #
124
+ # @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
125
+ #
124
126
  # @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
125
127
  #
126
128
  # @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -148,12 +150,14 @@ module ContextDev
148
150
  #
149
151
  # Scrapes the given URL and returns the raw HTML content of the page.
150
152
  #
151
- # @overload web_scrape_html(url:, max_age_ms: nil, request_options: {})
153
+ # @overload web_scrape_html(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
152
154
  #
153
155
  # @param url [String] Full URL to scrape (must include http:// or https:// protocol)
154
156
  #
155
157
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
156
158
  #
159
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
160
+ #
157
161
  # @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
158
162
  #
159
163
  # @return [ContextDev::Models::WebWebScrapeHTMLResponse]
@@ -165,7 +169,7 @@ module ContextDev
165
169
  @client.request(
166
170
  method: :get,
167
171
  path: "web/scrape/html",
168
- query: query.transform_keys(max_age_ms: "maxAgeMs"),
172
+ query: query.transform_keys(max_age_ms: "maxAgeMs", parse_pdf: "parsePDF"),
169
173
  model: ContextDev::Models::WebWebScrapeHTMLResponse,
170
174
  options: options
171
175
  )
@@ -201,7 +205,7 @@ module ContextDev
201
205
  #
202
206
  # Scrapes the given URL into LLM usable Markdown.
203
207
  #
204
- # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
208
+ # @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
205
209
  #
206
210
  # @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
207
211
  #
@@ -211,6 +215,8 @@ module ContextDev
211
215
  #
212
216
  # @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
213
217
  #
218
+ # @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
219
+ #
214
220
  # @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
215
221
  #
216
222
  # @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -230,6 +236,7 @@ module ContextDev
230
236
  include_images: "includeImages",
231
237
  include_links: "includeLinks",
232
238
  max_age_ms: "maxAgeMs",
239
+ parse_pdf: "parsePDF",
233
240
  shorten_base64_images: "shortenBase64Images",
234
241
  use_main_content_only: "useMainContentOnly"
235
242
  ),
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ContextDev
4
- VERSION = "1.6.0"
4
+ VERSION = "1.8.0"
5
5
  end
@@ -155,6 +155,10 @@ module ContextDev
155
155
  sig { returns(String) }
156
156
  attr_accessor :name
157
157
 
158
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
159
+ sig { returns(T.nilable(String)) }
160
+ attr_accessor :sku
161
+
158
162
  # Tags associated with the product
159
163
  sig { returns(T::Array[String]) }
160
164
  attr_accessor :tags
@@ -210,6 +214,7 @@ module ContextDev
210
214
  features: T::Array[String],
211
215
  images: T::Array[String],
212
216
  name: String,
217
+ sku: T.nilable(String),
213
218
  tags: T::Array[String],
214
219
  target_audience: T::Array[String],
215
220
  billing_frequency:
@@ -236,6 +241,8 @@ module ContextDev
236
241
  images:,
237
242
  # Name of the product
238
243
  name:,
244
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
245
+ sku:,
239
246
  # Tags associated with the product
240
247
  tags:,
241
248
  # Target audience for the product (array of strings)
@@ -264,6 +271,7 @@ module ContextDev
264
271
  features: T::Array[String],
265
272
  images: T::Array[String],
266
273
  name: String,
274
+ sku: T.nilable(String),
267
275
  tags: T::Array[String],
268
276
  target_audience: T::Array[String],
269
277
  billing_frequency:
@@ -81,6 +81,10 @@ module ContextDev
81
81
  sig { returns(String) }
82
82
  attr_accessor :name
83
83
 
84
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
85
+ sig { returns(T.nilable(String)) }
86
+ attr_accessor :sku
87
+
84
88
  # Tags associated with the product
85
89
  sig { returns(T::Array[String]) }
86
90
  attr_accessor :tags
@@ -135,6 +139,7 @@ module ContextDev
135
139
  features: T::Array[String],
136
140
  images: T::Array[String],
137
141
  name: String,
142
+ sku: T.nilable(String),
138
143
  tags: T::Array[String],
139
144
  target_audience: T::Array[String],
140
145
  billing_frequency:
@@ -161,6 +166,8 @@ module ContextDev
161
166
  images:,
162
167
  # Name of the product
163
168
  name:,
169
+ # Stock Keeping Unit (product identifier). Null if no identifier is found.
170
+ sku:,
164
171
  # Tags associated with the product
165
172
  tags:,
166
173
  # Target audience for the product (array of strings)
@@ -189,6 +196,7 @@ module ContextDev
189
196
  features: T::Array[String],
190
197
  images: T::Array[String],
191
198
  name: String,
199
+ sku: T.nilable(String),
192
200
  tags: T::Array[String],
193
201
  target_audience: T::Array[String],
194
202
  billing_frequency:
@@ -61,6 +61,15 @@ module ContextDev
61
61
  sig { params(max_pages: Integer).void }
62
62
  attr_writer :max_pages
63
63
 
64
+ # When true (default), PDF pages are fetched and their text layer is extracted and
65
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
66
+ # entirely (not included in results and not counted as failures).
67
+ sig { returns(T.nilable(T::Boolean)) }
68
+ attr_reader :parse_pdf
69
+
70
+ sig { params(parse_pdf: T::Boolean).void }
71
+ attr_writer :parse_pdf
72
+
64
73
  # Truncate base64-encoded image data in the Markdown output
65
74
  sig { returns(T.nilable(T::Boolean)) }
66
75
  attr_reader :shorten_base64_images
@@ -92,6 +101,7 @@ module ContextDev
92
101
  max_age_ms: Integer,
93
102
  max_depth: Integer,
94
103
  max_pages: Integer,
104
+ parse_pdf: T::Boolean,
95
105
  shorten_base64_images: T::Boolean,
96
106
  url_regex: String,
97
107
  use_main_content_only: T::Boolean,
@@ -117,6 +127,10 @@ module ContextDev
117
127
  max_depth: nil,
118
128
  # Maximum number of pages to crawl. Hard cap: 500.
119
129
  max_pages: nil,
130
+ # When true (default), PDF pages are fetched and their text layer is extracted and
131
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
132
+ # entirely (not included in results and not counted as failures).
133
+ parse_pdf: nil,
120
134
  # Truncate base64-encoded image data in the Markdown output
121
135
  shorten_base64_images: nil,
122
136
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -138,6 +152,7 @@ module ContextDev
138
152
  max_age_ms: Integer,
139
153
  max_depth: Integer,
140
154
  max_pages: Integer,
155
+ parse_pdf: T::Boolean,
141
156
  shorten_base64_images: T::Boolean,
142
157
  url_regex: String,
143
158
  use_main_content_only: T::Boolean,
@@ -64,6 +64,10 @@ module ContextDev
64
64
  sig { returns(Integer) }
65
65
  attr_accessor :num_failed
66
66
 
67
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
68
+ sig { returns(Integer) }
69
+ attr_accessor :num_skipped
70
+
67
71
  # Number of pages successfully crawled
68
72
  sig { returns(Integer) }
69
73
  attr_accessor :num_succeeded
@@ -76,6 +80,7 @@ module ContextDev
76
80
  params(
77
81
  max_crawl_depth: Integer,
78
82
  num_failed: Integer,
83
+ num_skipped: Integer,
79
84
  num_succeeded: Integer,
80
85
  num_urls: Integer
81
86
  ).returns(T.attached_class)
@@ -85,6 +90,8 @@ module ContextDev
85
90
  max_crawl_depth:,
86
91
  # Number of pages that failed to crawl
87
92
  num_failed:,
93
+ # Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
94
+ num_skipped:,
88
95
  # Number of pages successfully crawled
89
96
  num_succeeded:,
90
97
  # Total number of URLs crawled
@@ -97,6 +104,7 @@ module ContextDev
97
104
  {
98
105
  max_crawl_depth: Integer,
99
106
  num_failed: Integer,
107
+ num_skipped: Integer,
100
108
  num_succeeded: Integer,
101
109
  num_urls: Integer
102
110
  }
@@ -27,10 +27,20 @@ module ContextDev
27
27
  sig { params(max_age_ms: Integer).void }
28
28
  attr_writer :max_age_ms
29
29
 
30
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
31
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
32
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
33
+ sig { returns(T.nilable(T::Boolean)) }
34
+ attr_reader :parse_pdf
35
+
36
+ sig { params(parse_pdf: T::Boolean).void }
37
+ attr_writer :parse_pdf
38
+
30
39
  sig do
31
40
  params(
32
41
  url: String,
33
42
  max_age_ms: Integer,
43
+ parse_pdf: T::Boolean,
34
44
  request_options: ContextDev::RequestOptions::OrHash
35
45
  ).returns(T.attached_class)
36
46
  end
@@ -41,6 +51,10 @@ module ContextDev
41
51
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
42
52
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
43
53
  max_age_ms: nil,
54
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
55
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
56
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
57
+ parse_pdf: nil,
44
58
  request_options: {}
45
59
  )
46
60
  end
@@ -50,6 +64,7 @@ module ContextDev
50
64
  {
51
65
  url: String,
52
66
  max_age_ms: Integer,
67
+ parse_pdf: T::Boolean,
53
68
  request_options: ContextDev::RequestOptions
54
69
  }
55
70
  )
@@ -39,6 +39,15 @@ module ContextDev
39
39
  sig { params(max_age_ms: Integer).void }
40
40
  attr_writer :max_age_ms
41
41
 
42
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
43
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
44
+ # WEBSITE_ACCESS_ERROR is returned.
45
+ sig { returns(T.nilable(T::Boolean)) }
46
+ attr_reader :parse_pdf
47
+
48
+ sig { params(parse_pdf: T::Boolean).void }
49
+ attr_writer :parse_pdf
50
+
42
51
  # Shorten base64-encoded image data in the Markdown output
43
52
  sig { returns(T.nilable(T::Boolean)) }
44
53
  attr_reader :shorten_base64_images
@@ -60,6 +69,7 @@ module ContextDev
60
69
  include_images: T::Boolean,
61
70
  include_links: T::Boolean,
62
71
  max_age_ms: Integer,
72
+ parse_pdf: T::Boolean,
63
73
  shorten_base64_images: T::Boolean,
64
74
  use_main_content_only: T::Boolean,
65
75
  request_options: ContextDev::RequestOptions::OrHash
@@ -77,6 +87,10 @@ module ContextDev
77
87
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
78
88
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
79
89
  max_age_ms: nil,
90
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
91
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
92
+ # WEBSITE_ACCESS_ERROR is returned.
93
+ parse_pdf: nil,
80
94
  # Shorten base64-encoded image data in the Markdown output
81
95
  shorten_base64_images: nil,
82
96
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -93,6 +107,7 @@ module ContextDev
93
107
  include_images: T::Boolean,
94
108
  include_links: T::Boolean,
95
109
  max_age_ms: Integer,
110
+ parse_pdf: T::Boolean,
96
111
  shorten_base64_images: T::Boolean,
97
112
  use_main_content_only: T::Boolean,
98
113
  request_options: ContextDev::RequestOptions
@@ -108,6 +108,7 @@ module ContextDev
108
108
  max_age_ms: Integer,
109
109
  max_depth: Integer,
110
110
  max_pages: Integer,
111
+ parse_pdf: T::Boolean,
111
112
  shorten_base64_images: T::Boolean,
112
113
  url_regex: String,
113
114
  use_main_content_only: T::Boolean,
@@ -133,6 +134,10 @@ module ContextDev
133
134
  max_depth: nil,
134
135
  # Maximum number of pages to crawl. Hard cap: 500.
135
136
  max_pages: nil,
137
+ # When true (default), PDF pages are fetched and their text layer is extracted and
138
+ # converted to Markdown alongside HTML pages. When false, PDF pages are skipped
139
+ # entirely (not included in results and not counted as failures).
140
+ parse_pdf: nil,
136
141
  # Truncate base64-encoded image data in the Markdown output
137
142
  shorten_base64_images: nil,
138
143
  # Regex pattern. Only URLs matching this pattern will be followed and scraped.
@@ -149,6 +154,7 @@ module ContextDev
149
154
  params(
150
155
  url: String,
151
156
  max_age_ms: Integer,
157
+ parse_pdf: T::Boolean,
152
158
  request_options: ContextDev::RequestOptions::OrHash
153
159
  ).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
154
160
  end
@@ -159,6 +165,10 @@ module ContextDev
159
165
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
160
166
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
161
167
  max_age_ms: nil,
168
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
169
+ # returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
170
+ # and a 400 WEBSITE_ACCESS_ERROR is returned.
171
+ parse_pdf: nil,
162
172
  request_options: {}
163
173
  )
164
174
  end
@@ -186,6 +196,7 @@ module ContextDev
186
196
  include_images: T::Boolean,
187
197
  include_links: T::Boolean,
188
198
  max_age_ms: Integer,
199
+ parse_pdf: T::Boolean,
189
200
  shorten_base64_images: T::Boolean,
190
201
  use_main_content_only: T::Boolean,
191
202
  request_options: ContextDev::RequestOptions::OrHash
@@ -203,6 +214,10 @@ module ContextDev
203
214
  # younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
204
215
  # omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
205
216
  max_age_ms: nil,
217
+ # When true (default), PDF URLs are fetched and their text layer is extracted and
218
+ # converted to Markdown. When false, PDF URLs are skipped and a 400
219
+ # WEBSITE_ACCESS_ERROR is returned.
220
+ parse_pdf: nil,
206
221
  # Shorten base64-encoded image data in the Markdown output
207
222
  shorten_base64_images: nil,
208
223
  # Extract only the main content of the page, excluding headers, footers, sidebars,
@@ -47,6 +47,7 @@ module ContextDev
47
47
  features: ::Array[String],
48
48
  images: ::Array[String],
49
49
  name: String,
50
+ sku: String?,
50
51
  tags: ::Array[String],
51
52
  target_audience: ::Array[String],
52
53
  billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
@@ -67,6 +68,8 @@ module ContextDev
67
68
 
68
69
  attr_accessor name: String
69
70
 
71
+ attr_accessor sku: String?
72
+
70
73
  attr_accessor tags: ::Array[String]
71
74
 
72
75
  attr_accessor target_audience: ::Array[String]
@@ -90,6 +93,7 @@ module ContextDev
90
93
  features: ::Array[String],
91
94
  images: ::Array[String],
92
95
  name: String,
96
+ sku: String?,
93
97
  tags: ::Array[String],
94
98
  target_audience: ::Array[String],
95
99
  ?billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
@@ -106,6 +110,7 @@ module ContextDev
106
110
  features: ::Array[String],
107
111
  images: ::Array[String],
108
112
  name: String,
113
+ sku: String?,
109
114
  tags: ::Array[String],
110
115
  target_audience: ::Array[String],
111
116
  billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
@@ -26,6 +26,7 @@ module ContextDev
26
26
  features: ::Array[String],
27
27
  images: ::Array[String],
28
28
  name: String,
29
+ sku: String?,
29
30
  tags: ::Array[String],
30
31
  target_audience: ::Array[String],
31
32
  billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
@@ -46,6 +47,8 @@ module ContextDev
46
47
 
47
48
  attr_accessor name: String
48
49
 
50
+ attr_accessor sku: String?
51
+
49
52
  attr_accessor tags: ::Array[String]
50
53
 
51
54
  attr_accessor target_audience: ::Array[String]
@@ -69,6 +72,7 @@ module ContextDev
69
72
  features: ::Array[String],
70
73
  images: ::Array[String],
71
74
  name: String,
75
+ sku: String?,
72
76
  tags: ::Array[String],
73
77
  target_audience: ::Array[String],
74
78
  ?billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
@@ -85,6 +89,7 @@ module ContextDev
85
89
  features: ::Array[String],
86
90
  images: ::Array[String],
87
91
  name: String,
92
+ sku: String?,
88
93
  tags: ::Array[String],
89
94
  target_audience: ::Array[String],
90
95
  billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
@@ -9,6 +9,7 @@ module ContextDev
9
9
  max_age_ms: Integer,
10
10
  max_depth: Integer,
11
11
  max_pages: Integer,
12
+ parse_pdf: bool,
12
13
  :shorten_base64_images => bool,
13
14
  url_regex: String,
14
15
  use_main_content_only: bool
@@ -45,6 +46,10 @@ module ContextDev
45
46
 
46
47
  def max_pages=: (Integer) -> Integer
47
48
 
49
+ attr_reader parse_pdf: bool?
50
+
51
+ def parse_pdf=: (bool) -> bool
52
+
48
53
  attr_reader shorten_base64_images: bool?
49
54
 
50
55
  def shorten_base64_images=: (bool) -> bool
@@ -65,6 +70,7 @@ module ContextDev
65
70
  ?max_age_ms: Integer,
66
71
  ?max_depth: Integer,
67
72
  ?max_pages: Integer,
73
+ ?parse_pdf: bool,
68
74
  ?shorten_base64_images: bool,
69
75
  ?url_regex: String,
70
76
  ?use_main_content_only: bool,
@@ -79,6 +85,7 @@ module ContextDev
79
85
  max_age_ms: Integer,
80
86
  max_depth: Integer,
81
87
  max_pages: Integer,
88
+ parse_pdf: bool,
82
89
  :shorten_base64_images => bool,
83
90
  url_regex: String,
84
91
  use_main_content_only: bool,
@@ -25,6 +25,7 @@ module ContextDev
25
25
  {
26
26
  max_crawl_depth: Integer,
27
27
  num_failed: Integer,
28
+ num_skipped: Integer,
28
29
  num_succeeded: Integer,
29
30
  num_urls: Integer
30
31
  }
@@ -34,6 +35,8 @@ module ContextDev
34
35
 
35
36
  attr_accessor num_failed: Integer
36
37
 
38
+ attr_accessor num_skipped: Integer
39
+
37
40
  attr_accessor num_succeeded: Integer
38
41
 
39
42
  attr_accessor num_urls: Integer
@@ -41,6 +44,7 @@ module ContextDev
41
44
  def initialize: (
42
45
  max_crawl_depth: Integer,
43
46
  num_failed: Integer,
47
+ num_skipped: Integer,
44
48
  num_succeeded: Integer,
45
49
  num_urls: Integer
46
50
  ) -> void
@@ -48,6 +52,7 @@ module ContextDev
48
52
  def to_hash: -> {
49
53
  max_crawl_depth: Integer,
50
54
  num_failed: Integer,
55
+ num_skipped: Integer,
51
56
  num_succeeded: Integer,
52
57
  num_urls: Integer
53
58
  }
@@ -1,7 +1,7 @@
1
1
  module ContextDev
2
2
  module Models
3
3
  type web_web_scrape_html_params =
4
- { url: String, max_age_ms: Integer }
4
+ { url: String, max_age_ms: Integer, parse_pdf: bool }
5
5
  & ContextDev::Internal::Type::request_parameters
6
6
 
7
7
  class WebWebScrapeHTMLParams < ContextDev::Internal::Type::BaseModel
@@ -14,15 +14,21 @@ module ContextDev
14
14
 
15
15
  def max_age_ms=: (Integer) -> Integer
16
16
 
17
+ attr_reader parse_pdf: bool?
18
+
19
+ def parse_pdf=: (bool) -> bool
20
+
17
21
  def initialize: (
18
22
  url: String,
19
23
  ?max_age_ms: Integer,
24
+ ?parse_pdf: bool,
20
25
  ?request_options: ContextDev::request_opts
21
26
  ) -> void
22
27
 
23
28
  def to_hash: -> {
24
29
  url: String,
25
30
  max_age_ms: Integer,
31
+ parse_pdf: bool,
26
32
  request_options: ContextDev::RequestOptions
27
33
  }
28
34
  end
@@ -6,6 +6,7 @@ module ContextDev
6
6
  include_images: bool,
7
7
  include_links: bool,
8
8
  max_age_ms: Integer,
9
+ parse_pdf: bool,
9
10
  :shorten_base64_images => bool,
10
11
  use_main_content_only: bool
11
12
  }
@@ -29,6 +30,10 @@ module ContextDev
29
30
 
30
31
  def max_age_ms=: (Integer) -> Integer
31
32
 
33
+ attr_reader parse_pdf: bool?
34
+
35
+ def parse_pdf=: (bool) -> bool
36
+
32
37
  attr_reader shorten_base64_images: bool?
33
38
 
34
39
  def shorten_base64_images=: (bool) -> bool
@@ -42,6 +47,7 @@ module ContextDev
42
47
  ?include_images: bool,
43
48
  ?include_links: bool,
44
49
  ?max_age_ms: Integer,
50
+ ?parse_pdf: bool,
45
51
  ?shorten_base64_images: bool,
46
52
  ?use_main_content_only: bool,
47
53
  ?request_options: ContextDev::request_opts
@@ -52,6 +58,7 @@ module ContextDev
52
58
  include_images: bool,
53
59
  include_links: bool,
54
60
  max_age_ms: Integer,
61
+ parse_pdf: bool,
55
62
  :shorten_base64_images => bool,
56
63
  use_main_content_only: bool,
57
64
  request_options: ContextDev::RequestOptions
@@ -32,6 +32,7 @@ module ContextDev
32
32
  ?max_age_ms: Integer,
33
33
  ?max_depth: Integer,
34
34
  ?max_pages: Integer,
35
+ ?parse_pdf: bool,
35
36
  ?shorten_base64_images: bool,
36
37
  ?url_regex: String,
37
38
  ?use_main_content_only: bool,
@@ -41,6 +42,7 @@ module ContextDev
41
42
  def web_scrape_html: (
42
43
  url: String,
43
44
  ?max_age_ms: Integer,
45
+ ?parse_pdf: bool,
44
46
  ?request_options: ContextDev::request_opts
45
47
  ) -> ContextDev::Models::WebWebScrapeHTMLResponse
46
48
 
@@ -54,6 +56,7 @@ module ContextDev
54
56
  ?include_images: bool,
55
57
  ?include_links: bool,
56
58
  ?max_age_ms: Integer,
59
+ ?parse_pdf: bool,
57
60
  ?shorten_base64_images: bool,
58
61
  ?use_main_content_only: bool,
59
62
  ?request_options: ContextDev::request_opts
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: context.dev
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Context Dev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-23 00:00:00.000000000 Z
11
+ date: 2026-04-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cgi