context.dev 1.6.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +1 -1
- data/lib/context_dev/models/ai_extract_product_response.rb +9 -1
- data/lib/context_dev/models/ai_extract_products_response.rb +9 -1
- data/lib/context_dev/models/web_web_crawl_md_params.rb +11 -1
- data/lib/context_dev/models/web_web_crawl_md_response.rb +12 -1
- data/lib/context_dev/models/web_web_scrape_html_params.rb +11 -1
- data/lib/context_dev/models/web_web_scrape_md_params.rb +11 -1
- data/lib/context_dev/resources/web.rb +11 -4
- data/lib/context_dev/version.rb +1 -1
- data/rbi/context_dev/models/ai_extract_product_response.rbi +8 -0
- data/rbi/context_dev/models/ai_extract_products_response.rbi +8 -0
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +15 -0
- data/rbi/context_dev/models/web_web_crawl_md_response.rbi +8 -0
- data/rbi/context_dev/models/web_web_scrape_html_params.rbi +15 -0
- data/rbi/context_dev/models/web_web_scrape_md_params.rbi +15 -0
- data/rbi/context_dev/resources/web.rbi +15 -0
- data/sig/context_dev/models/ai_extract_product_response.rbs +5 -0
- data/sig/context_dev/models/ai_extract_products_response.rbs +5 -0
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +7 -0
- data/sig/context_dev/models/web_web_crawl_md_response.rbs +5 -0
- data/sig/context_dev/models/web_web_scrape_html_params.rbs +7 -1
- data/sig/context_dev/models/web_web_scrape_md_params.rbs +7 -0
- data/sig/context_dev/resources/web.rbs +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 968dddadfbf1199743ee57bc82a47fd2d7b882da416170b13f41bf0574828d5d
|
|
4
|
+
data.tar.gz: 5e35c5601c635fdc8182a81e5738418f4e4d879231cbaea89b1722ba5f28db00
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9ffe4d5dde7376e41afd9607af63fcecd7ab97d8abf26a2adceef1a30594ec54dd10356c731c54dd97ebad44e00eb2d24132ebe27c25a95acfd8a568578557a9
|
|
7
|
+
data.tar.gz: 2fc80f2bc42e75a0dc03f4c3692f45e6eadce167280695690d69bc408c87eb9504fb557cd661c33041bff9c59876a6f47bbd8ba360b092ab762e70221b79e3ff
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.8.0 (2026-04-24)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v1.7.0...v1.8.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.7.0...v1.8.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** api update ([84bfef2](https://github.com/context-dot-dev/context-ruby-sdk/commit/84bfef260d99a7a444c243edc4be1c72191929dd))
|
|
10
|
+
* **api:** api update ([5405b96](https://github.com/context-dot-dev/context-ruby-sdk/commit/5405b9676800e2004d92dd9f621dfc56972bacd2))
|
|
11
|
+
|
|
12
|
+
## 1.7.0 (2026-04-24)
|
|
13
|
+
|
|
14
|
+
Full Changelog: [v1.6.0...v1.7.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.6.0...v1.7.0)
|
|
15
|
+
|
|
16
|
+
### Features
|
|
17
|
+
|
|
18
|
+
* **api:** api update ([9c8b4d9](https://github.com/context-dot-dev/context-ruby-sdk/commit/9c8b4d9b1d813c5b7992998a3fa23cde63fe2f4c))
|
|
19
|
+
|
|
3
20
|
## 1.6.0 (2026-04-23)
|
|
4
21
|
|
|
5
22
|
Full Changelog: [v1.5.0...v1.6.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.5.0...v1.6.0)
|
data/README.md
CHANGED
|
@@ -70,6 +70,12 @@ module ContextDev
|
|
|
70
70
|
# @return [String]
|
|
71
71
|
required :name, String
|
|
72
72
|
|
|
73
|
+
# @!attribute sku
|
|
74
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
75
|
+
#
|
|
76
|
+
# @return [String, nil]
|
|
77
|
+
required :sku, String, nil?: true
|
|
78
|
+
|
|
73
79
|
# @!attribute tags
|
|
74
80
|
# Tags associated with the product
|
|
75
81
|
#
|
|
@@ -128,7 +134,7 @@ module ContextDev
|
|
|
128
134
|
# @return [String, nil]
|
|
129
135
|
optional :url, String, nil?: true
|
|
130
136
|
|
|
131
|
-
# @!method initialize(description:, features:, images:, name:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
|
|
137
|
+
# @!method initialize(description:, features:, images:, name:, sku:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
|
|
132
138
|
# The extracted product data, or null if not a product page
|
|
133
139
|
#
|
|
134
140
|
# @param description [String] Description of the product
|
|
@@ -139,6 +145,8 @@ module ContextDev
|
|
|
139
145
|
#
|
|
140
146
|
# @param name [String] Name of the product
|
|
141
147
|
#
|
|
148
|
+
# @param sku [String, nil] Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
149
|
+
#
|
|
142
150
|
# @param tags [Array<String>] Tags associated with the product
|
|
143
151
|
#
|
|
144
152
|
# @param target_audience [Array<String>] Target audience for the product (array of strings)
|
|
@@ -39,6 +39,12 @@ module ContextDev
|
|
|
39
39
|
# @return [String]
|
|
40
40
|
required :name, String
|
|
41
41
|
|
|
42
|
+
# @!attribute sku
|
|
43
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
44
|
+
#
|
|
45
|
+
# @return [String, nil]
|
|
46
|
+
required :sku, String, nil?: true
|
|
47
|
+
|
|
42
48
|
# @!attribute tags
|
|
43
49
|
# Tags associated with the product
|
|
44
50
|
#
|
|
@@ -97,7 +103,7 @@ module ContextDev
|
|
|
97
103
|
# @return [String, nil]
|
|
98
104
|
optional :url, String, nil?: true
|
|
99
105
|
|
|
100
|
-
# @!method initialize(description:, features:, images:, name:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
|
|
106
|
+
# @!method initialize(description:, features:, images:, name:, sku:, tags:, target_audience:, billing_frequency: nil, category: nil, currency: nil, image_url: nil, price: nil, pricing_model: nil, url: nil)
|
|
101
107
|
# @param description [String] Description of the product
|
|
102
108
|
#
|
|
103
109
|
# @param features [Array<String>] List of product features
|
|
@@ -106,6 +112,8 @@ module ContextDev
|
|
|
106
112
|
#
|
|
107
113
|
# @param name [String] Name of the product
|
|
108
114
|
#
|
|
115
|
+
# @param sku [String, nil] Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
116
|
+
#
|
|
109
117
|
# @param tags [Array<String>] Tags associated with the product
|
|
110
118
|
#
|
|
111
119
|
# @param target_audience [Array<String>] Target audience for the product (array of strings)
|
|
@@ -53,6 +53,14 @@ module ContextDev
|
|
|
53
53
|
# @return [Integer, nil]
|
|
54
54
|
optional :max_pages, Integer, api_name: :maxPages
|
|
55
55
|
|
|
56
|
+
# @!attribute parse_pdf
|
|
57
|
+
# When true (default), PDF pages are fetched and their text layer is extracted and
|
|
58
|
+
# converted to Markdown alongside HTML pages. When false, PDF pages are skipped
|
|
59
|
+
# entirely (not included in results and not counted as failures).
|
|
60
|
+
#
|
|
61
|
+
# @return [Boolean, nil]
|
|
62
|
+
optional :parse_pdf, ContextDev::Internal::Type::Boolean, api_name: :parsePDF
|
|
63
|
+
|
|
56
64
|
# @!attribute shorten_base64_images
|
|
57
65
|
# Truncate base64-encoded image data in the Markdown output
|
|
58
66
|
#
|
|
@@ -72,7 +80,7 @@ module ContextDev
|
|
|
72
80
|
# @return [Boolean, nil]
|
|
73
81
|
optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
|
|
74
82
|
|
|
75
|
-
# @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
83
|
+
# @!method initialize(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
76
84
|
# Some parameter documentations has been truncated, see
|
|
77
85
|
# {ContextDev::Models::WebWebCrawlMdParams} for more details.
|
|
78
86
|
#
|
|
@@ -90,6 +98,8 @@ module ContextDev
|
|
|
90
98
|
#
|
|
91
99
|
# @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
|
|
92
100
|
#
|
|
101
|
+
# @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
|
|
102
|
+
#
|
|
93
103
|
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
94
104
|
#
|
|
95
105
|
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
@@ -33,6 +33,12 @@ module ContextDev
|
|
|
33
33
|
# @return [Integer]
|
|
34
34
|
required :num_failed, Integer, api_name: :numFailed
|
|
35
35
|
|
|
36
|
+
# @!attribute num_skipped
|
|
37
|
+
# Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
|
|
38
|
+
#
|
|
39
|
+
# @return [Integer]
|
|
40
|
+
required :num_skipped, Integer, api_name: :numSkipped
|
|
41
|
+
|
|
36
42
|
# @!attribute num_succeeded
|
|
37
43
|
# Number of pages successfully crawled
|
|
38
44
|
#
|
|
@@ -45,11 +51,16 @@ module ContextDev
|
|
|
45
51
|
# @return [Integer]
|
|
46
52
|
required :num_urls, Integer, api_name: :numUrls
|
|
47
53
|
|
|
48
|
-
# @!method initialize(max_crawl_depth:, num_failed:, num_succeeded:, num_urls:)
|
|
54
|
+
# @!method initialize(max_crawl_depth:, num_failed:, num_skipped:, num_succeeded:, num_urls:)
|
|
55
|
+
# Some parameter documentations has been truncated, see
|
|
56
|
+
# {ContextDev::Models::WebWebCrawlMdResponse::Metadata} for more details.
|
|
57
|
+
#
|
|
49
58
|
# @param max_crawl_depth [Integer] Maximum crawl depth reached during the crawl
|
|
50
59
|
#
|
|
51
60
|
# @param num_failed [Integer] Number of pages that failed to crawl
|
|
52
61
|
#
|
|
62
|
+
# @param num_skipped [Integer] Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
|
|
63
|
+
#
|
|
53
64
|
# @param num_succeeded [Integer] Number of pages successfully crawled
|
|
54
65
|
#
|
|
55
66
|
# @param num_urls [Integer] Total number of URLs crawled
|
|
@@ -21,7 +21,15 @@ module ContextDev
|
|
|
21
21
|
# @return [Integer, nil]
|
|
22
22
|
optional :max_age_ms, Integer
|
|
23
23
|
|
|
24
|
-
# @!
|
|
24
|
+
# @!attribute parse_pdf
|
|
25
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
26
|
+
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
27
|
+
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
28
|
+
#
|
|
29
|
+
# @return [Boolean, nil]
|
|
30
|
+
optional :parse_pdf, ContextDev::Internal::Type::Boolean
|
|
31
|
+
|
|
32
|
+
# @!method initialize(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
|
|
25
33
|
# Some parameter documentations has been truncated, see
|
|
26
34
|
# {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
|
|
27
35
|
#
|
|
@@ -29,6 +37,8 @@ module ContextDev
|
|
|
29
37
|
#
|
|
30
38
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
31
39
|
#
|
|
40
|
+
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
41
|
+
#
|
|
32
42
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
33
43
|
end
|
|
34
44
|
end
|
|
@@ -34,6 +34,14 @@ module ContextDev
|
|
|
34
34
|
# @return [Integer, nil]
|
|
35
35
|
optional :max_age_ms, Integer
|
|
36
36
|
|
|
37
|
+
# @!attribute parse_pdf
|
|
38
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
39
|
+
# converted to Markdown. When false, PDF URLs are skipped and a 400
|
|
40
|
+
# WEBSITE_ACCESS_ERROR is returned.
|
|
41
|
+
#
|
|
42
|
+
# @return [Boolean, nil]
|
|
43
|
+
optional :parse_pdf, ContextDev::Internal::Type::Boolean
|
|
44
|
+
|
|
37
45
|
# @!attribute shorten_base64_images
|
|
38
46
|
# Shorten base64-encoded image data in the Markdown output
|
|
39
47
|
#
|
|
@@ -47,7 +55,7 @@ module ContextDev
|
|
|
47
55
|
# @return [Boolean, nil]
|
|
48
56
|
optional :use_main_content_only, ContextDev::Internal::Type::Boolean
|
|
49
57
|
|
|
50
|
-
# @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
|
|
58
|
+
# @!method initialize(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
|
|
51
59
|
# Some parameter documentations has been truncated, see
|
|
52
60
|
# {ContextDev::Models::WebWebScrapeMdParams} for more details.
|
|
53
61
|
#
|
|
@@ -59,6 +67,8 @@ module ContextDev
|
|
|
59
67
|
#
|
|
60
68
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
61
69
|
#
|
|
70
|
+
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
71
|
+
#
|
|
62
72
|
# @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
|
|
63
73
|
#
|
|
64
74
|
# @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -105,7 +105,7 @@ module ContextDev
|
|
|
105
105
|
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
106
106
|
# and returns results for all crawled pages.
|
|
107
107
|
#
|
|
108
|
-
# @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
108
|
+
# @overload web_crawl_md(url:, follow_subdomains: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
109
109
|
#
|
|
110
110
|
# @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
|
|
111
111
|
#
|
|
@@ -121,6 +121,8 @@ module ContextDev
|
|
|
121
121
|
#
|
|
122
122
|
# @param max_pages [Integer] Maximum number of pages to crawl. Hard cap: 500.
|
|
123
123
|
#
|
|
124
|
+
# @param parse_pdf [Boolean] When true (default), PDF pages are fetched and their text layer is extracted and
|
|
125
|
+
#
|
|
124
126
|
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
125
127
|
#
|
|
126
128
|
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
@@ -148,12 +150,14 @@ module ContextDev
|
|
|
148
150
|
#
|
|
149
151
|
# Scrapes the given URL and returns the raw HTML content of the page.
|
|
150
152
|
#
|
|
151
|
-
# @overload web_scrape_html(url:, max_age_ms: nil, request_options: {})
|
|
153
|
+
# @overload web_scrape_html(url:, max_age_ms: nil, parse_pdf: nil, request_options: {})
|
|
152
154
|
#
|
|
153
155
|
# @param url [String] Full URL to scrape (must include http:// or https:// protocol)
|
|
154
156
|
#
|
|
155
157
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
156
158
|
#
|
|
159
|
+
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
160
|
+
#
|
|
157
161
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
158
162
|
#
|
|
159
163
|
# @return [ContextDev::Models::WebWebScrapeHTMLResponse]
|
|
@@ -165,7 +169,7 @@ module ContextDev
|
|
|
165
169
|
@client.request(
|
|
166
170
|
method: :get,
|
|
167
171
|
path: "web/scrape/html",
|
|
168
|
-
query: query.transform_keys(max_age_ms: "maxAgeMs"),
|
|
172
|
+
query: query.transform_keys(max_age_ms: "maxAgeMs", parse_pdf: "parsePDF"),
|
|
169
173
|
model: ContextDev::Models::WebWebScrapeHTMLResponse,
|
|
170
174
|
options: options
|
|
171
175
|
)
|
|
@@ -201,7 +205,7 @@ module ContextDev
|
|
|
201
205
|
#
|
|
202
206
|
# Scrapes the given URL into LLM usable Markdown.
|
|
203
207
|
#
|
|
204
|
-
# @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
|
|
208
|
+
# @overload web_scrape_md(url:, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
|
|
205
209
|
#
|
|
206
210
|
# @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
|
|
207
211
|
#
|
|
@@ -211,6 +215,8 @@ module ContextDev
|
|
|
211
215
|
#
|
|
212
216
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
213
217
|
#
|
|
218
|
+
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
219
|
+
#
|
|
214
220
|
# @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
|
|
215
221
|
#
|
|
216
222
|
# @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -230,6 +236,7 @@ module ContextDev
|
|
|
230
236
|
include_images: "includeImages",
|
|
231
237
|
include_links: "includeLinks",
|
|
232
238
|
max_age_ms: "maxAgeMs",
|
|
239
|
+
parse_pdf: "parsePDF",
|
|
233
240
|
shorten_base64_images: "shortenBase64Images",
|
|
234
241
|
use_main_content_only: "useMainContentOnly"
|
|
235
242
|
),
|
data/lib/context_dev/version.rb
CHANGED
|
@@ -155,6 +155,10 @@ module ContextDev
|
|
|
155
155
|
sig { returns(String) }
|
|
156
156
|
attr_accessor :name
|
|
157
157
|
|
|
158
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
159
|
+
sig { returns(T.nilable(String)) }
|
|
160
|
+
attr_accessor :sku
|
|
161
|
+
|
|
158
162
|
# Tags associated with the product
|
|
159
163
|
sig { returns(T::Array[String]) }
|
|
160
164
|
attr_accessor :tags
|
|
@@ -210,6 +214,7 @@ module ContextDev
|
|
|
210
214
|
features: T::Array[String],
|
|
211
215
|
images: T::Array[String],
|
|
212
216
|
name: String,
|
|
217
|
+
sku: T.nilable(String),
|
|
213
218
|
tags: T::Array[String],
|
|
214
219
|
target_audience: T::Array[String],
|
|
215
220
|
billing_frequency:
|
|
@@ -236,6 +241,8 @@ module ContextDev
|
|
|
236
241
|
images:,
|
|
237
242
|
# Name of the product
|
|
238
243
|
name:,
|
|
244
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
245
|
+
sku:,
|
|
239
246
|
# Tags associated with the product
|
|
240
247
|
tags:,
|
|
241
248
|
# Target audience for the product (array of strings)
|
|
@@ -264,6 +271,7 @@ module ContextDev
|
|
|
264
271
|
features: T::Array[String],
|
|
265
272
|
images: T::Array[String],
|
|
266
273
|
name: String,
|
|
274
|
+
sku: T.nilable(String),
|
|
267
275
|
tags: T::Array[String],
|
|
268
276
|
target_audience: T::Array[String],
|
|
269
277
|
billing_frequency:
|
|
@@ -81,6 +81,10 @@ module ContextDev
|
|
|
81
81
|
sig { returns(String) }
|
|
82
82
|
attr_accessor :name
|
|
83
83
|
|
|
84
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
85
|
+
sig { returns(T.nilable(String)) }
|
|
86
|
+
attr_accessor :sku
|
|
87
|
+
|
|
84
88
|
# Tags associated with the product
|
|
85
89
|
sig { returns(T::Array[String]) }
|
|
86
90
|
attr_accessor :tags
|
|
@@ -135,6 +139,7 @@ module ContextDev
|
|
|
135
139
|
features: T::Array[String],
|
|
136
140
|
images: T::Array[String],
|
|
137
141
|
name: String,
|
|
142
|
+
sku: T.nilable(String),
|
|
138
143
|
tags: T::Array[String],
|
|
139
144
|
target_audience: T::Array[String],
|
|
140
145
|
billing_frequency:
|
|
@@ -161,6 +166,8 @@ module ContextDev
|
|
|
161
166
|
images:,
|
|
162
167
|
# Name of the product
|
|
163
168
|
name:,
|
|
169
|
+
# Stock Keeping Unit (product identifier). Null if no identifier is found.
|
|
170
|
+
sku:,
|
|
164
171
|
# Tags associated with the product
|
|
165
172
|
tags:,
|
|
166
173
|
# Target audience for the product (array of strings)
|
|
@@ -189,6 +196,7 @@ module ContextDev
|
|
|
189
196
|
features: T::Array[String],
|
|
190
197
|
images: T::Array[String],
|
|
191
198
|
name: String,
|
|
199
|
+
sku: T.nilable(String),
|
|
192
200
|
tags: T::Array[String],
|
|
193
201
|
target_audience: T::Array[String],
|
|
194
202
|
billing_frequency:
|
|
@@ -61,6 +61,15 @@ module ContextDev
|
|
|
61
61
|
sig { params(max_pages: Integer).void }
|
|
62
62
|
attr_writer :max_pages
|
|
63
63
|
|
|
64
|
+
# When true (default), PDF pages are fetched and their text layer is extracted and
|
|
65
|
+
# converted to Markdown alongside HTML pages. When false, PDF pages are skipped
|
|
66
|
+
# entirely (not included in results and not counted as failures).
|
|
67
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
68
|
+
attr_reader :parse_pdf
|
|
69
|
+
|
|
70
|
+
sig { params(parse_pdf: T::Boolean).void }
|
|
71
|
+
attr_writer :parse_pdf
|
|
72
|
+
|
|
64
73
|
# Truncate base64-encoded image data in the Markdown output
|
|
65
74
|
sig { returns(T.nilable(T::Boolean)) }
|
|
66
75
|
attr_reader :shorten_base64_images
|
|
@@ -92,6 +101,7 @@ module ContextDev
|
|
|
92
101
|
max_age_ms: Integer,
|
|
93
102
|
max_depth: Integer,
|
|
94
103
|
max_pages: Integer,
|
|
104
|
+
parse_pdf: T::Boolean,
|
|
95
105
|
shorten_base64_images: T::Boolean,
|
|
96
106
|
url_regex: String,
|
|
97
107
|
use_main_content_only: T::Boolean,
|
|
@@ -117,6 +127,10 @@ module ContextDev
|
|
|
117
127
|
max_depth: nil,
|
|
118
128
|
# Maximum number of pages to crawl. Hard cap: 500.
|
|
119
129
|
max_pages: nil,
|
|
130
|
+
# When true (default), PDF pages are fetched and their text layer is extracted and
|
|
131
|
+
# converted to Markdown alongside HTML pages. When false, PDF pages are skipped
|
|
132
|
+
# entirely (not included in results and not counted as failures).
|
|
133
|
+
parse_pdf: nil,
|
|
120
134
|
# Truncate base64-encoded image data in the Markdown output
|
|
121
135
|
shorten_base64_images: nil,
|
|
122
136
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
@@ -138,6 +152,7 @@ module ContextDev
|
|
|
138
152
|
max_age_ms: Integer,
|
|
139
153
|
max_depth: Integer,
|
|
140
154
|
max_pages: Integer,
|
|
155
|
+
parse_pdf: T::Boolean,
|
|
141
156
|
shorten_base64_images: T::Boolean,
|
|
142
157
|
url_regex: String,
|
|
143
158
|
use_main_content_only: T::Boolean,
|
|
@@ -64,6 +64,10 @@ module ContextDev
|
|
|
64
64
|
sig { returns(Integer) }
|
|
65
65
|
attr_accessor :num_failed
|
|
66
66
|
|
|
67
|
+
# Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
|
|
68
|
+
sig { returns(Integer) }
|
|
69
|
+
attr_accessor :num_skipped
|
|
70
|
+
|
|
67
71
|
# Number of pages successfully crawled
|
|
68
72
|
sig { returns(Integer) }
|
|
69
73
|
attr_accessor :num_succeeded
|
|
@@ -76,6 +80,7 @@ module ContextDev
|
|
|
76
80
|
params(
|
|
77
81
|
max_crawl_depth: Integer,
|
|
78
82
|
num_failed: Integer,
|
|
83
|
+
num_skipped: Integer,
|
|
79
84
|
num_succeeded: Integer,
|
|
80
85
|
num_urls: Integer
|
|
81
86
|
).returns(T.attached_class)
|
|
@@ -85,6 +90,8 @@ module ContextDev
|
|
|
85
90
|
max_crawl_depth:,
|
|
86
91
|
# Number of pages that failed to crawl
|
|
87
92
|
num_failed:,
|
|
93
|
+
# Number of URLs skipped (PDFs when parsePDF=false, or URLs not matching urlRegex)
|
|
94
|
+
num_skipped:,
|
|
88
95
|
# Number of pages successfully crawled
|
|
89
96
|
num_succeeded:,
|
|
90
97
|
# Total number of URLs crawled
|
|
@@ -97,6 +104,7 @@ module ContextDev
|
|
|
97
104
|
{
|
|
98
105
|
max_crawl_depth: Integer,
|
|
99
106
|
num_failed: Integer,
|
|
107
|
+
num_skipped: Integer,
|
|
100
108
|
num_succeeded: Integer,
|
|
101
109
|
num_urls: Integer
|
|
102
110
|
}
|
|
@@ -27,10 +27,20 @@ module ContextDev
|
|
|
27
27
|
sig { params(max_age_ms: Integer).void }
|
|
28
28
|
attr_writer :max_age_ms
|
|
29
29
|
|
|
30
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
31
|
+
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
32
|
+
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
33
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
34
|
+
attr_reader :parse_pdf
|
|
35
|
+
|
|
36
|
+
sig { params(parse_pdf: T::Boolean).void }
|
|
37
|
+
attr_writer :parse_pdf
|
|
38
|
+
|
|
30
39
|
sig do
|
|
31
40
|
params(
|
|
32
41
|
url: String,
|
|
33
42
|
max_age_ms: Integer,
|
|
43
|
+
parse_pdf: T::Boolean,
|
|
34
44
|
request_options: ContextDev::RequestOptions::OrHash
|
|
35
45
|
).returns(T.attached_class)
|
|
36
46
|
end
|
|
@@ -41,6 +51,10 @@ module ContextDev
|
|
|
41
51
|
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
42
52
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
43
53
|
max_age_ms: nil,
|
|
54
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
55
|
+
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
56
|
+
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
57
|
+
parse_pdf: nil,
|
|
44
58
|
request_options: {}
|
|
45
59
|
)
|
|
46
60
|
end
|
|
@@ -50,6 +64,7 @@ module ContextDev
|
|
|
50
64
|
{
|
|
51
65
|
url: String,
|
|
52
66
|
max_age_ms: Integer,
|
|
67
|
+
parse_pdf: T::Boolean,
|
|
53
68
|
request_options: ContextDev::RequestOptions
|
|
54
69
|
}
|
|
55
70
|
)
|
|
@@ -39,6 +39,15 @@ module ContextDev
|
|
|
39
39
|
sig { params(max_age_ms: Integer).void }
|
|
40
40
|
attr_writer :max_age_ms
|
|
41
41
|
|
|
42
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
43
|
+
# converted to Markdown. When false, PDF URLs are skipped and a 400
|
|
44
|
+
# WEBSITE_ACCESS_ERROR is returned.
|
|
45
|
+
sig { returns(T.nilable(T::Boolean)) }
|
|
46
|
+
attr_reader :parse_pdf
|
|
47
|
+
|
|
48
|
+
sig { params(parse_pdf: T::Boolean).void }
|
|
49
|
+
attr_writer :parse_pdf
|
|
50
|
+
|
|
42
51
|
# Shorten base64-encoded image data in the Markdown output
|
|
43
52
|
sig { returns(T.nilable(T::Boolean)) }
|
|
44
53
|
attr_reader :shorten_base64_images
|
|
@@ -60,6 +69,7 @@ module ContextDev
|
|
|
60
69
|
include_images: T::Boolean,
|
|
61
70
|
include_links: T::Boolean,
|
|
62
71
|
max_age_ms: Integer,
|
|
72
|
+
parse_pdf: T::Boolean,
|
|
63
73
|
shorten_base64_images: T::Boolean,
|
|
64
74
|
use_main_content_only: T::Boolean,
|
|
65
75
|
request_options: ContextDev::RequestOptions::OrHash
|
|
@@ -77,6 +87,10 @@ module ContextDev
|
|
|
77
87
|
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
78
88
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
79
89
|
max_age_ms: nil,
|
|
90
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
91
|
+
# converted to Markdown. When false, PDF URLs are skipped and a 400
|
|
92
|
+
# WEBSITE_ACCESS_ERROR is returned.
|
|
93
|
+
parse_pdf: nil,
|
|
80
94
|
# Shorten base64-encoded image data in the Markdown output
|
|
81
95
|
shorten_base64_images: nil,
|
|
82
96
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -93,6 +107,7 @@ module ContextDev
|
|
|
93
107
|
include_images: T::Boolean,
|
|
94
108
|
include_links: T::Boolean,
|
|
95
109
|
max_age_ms: Integer,
|
|
110
|
+
parse_pdf: T::Boolean,
|
|
96
111
|
shorten_base64_images: T::Boolean,
|
|
97
112
|
use_main_content_only: T::Boolean,
|
|
98
113
|
request_options: ContextDev::RequestOptions
|
|
@@ -108,6 +108,7 @@ module ContextDev
|
|
|
108
108
|
max_age_ms: Integer,
|
|
109
109
|
max_depth: Integer,
|
|
110
110
|
max_pages: Integer,
|
|
111
|
+
parse_pdf: T::Boolean,
|
|
111
112
|
shorten_base64_images: T::Boolean,
|
|
112
113
|
url_regex: String,
|
|
113
114
|
use_main_content_only: T::Boolean,
|
|
@@ -133,6 +134,10 @@ module ContextDev
|
|
|
133
134
|
max_depth: nil,
|
|
134
135
|
# Maximum number of pages to crawl. Hard cap: 500.
|
|
135
136
|
max_pages: nil,
|
|
137
|
+
# When true (default), PDF pages are fetched and their text layer is extracted and
|
|
138
|
+
# converted to Markdown alongside HTML pages. When false, PDF pages are skipped
|
|
139
|
+
# entirely (not included in results and not counted as failures).
|
|
140
|
+
parse_pdf: nil,
|
|
136
141
|
# Truncate base64-encoded image data in the Markdown output
|
|
137
142
|
shorten_base64_images: nil,
|
|
138
143
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
@@ -149,6 +154,7 @@ module ContextDev
|
|
|
149
154
|
params(
|
|
150
155
|
url: String,
|
|
151
156
|
max_age_ms: Integer,
|
|
157
|
+
parse_pdf: T::Boolean,
|
|
152
158
|
request_options: ContextDev::RequestOptions::OrHash
|
|
153
159
|
).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
|
|
154
160
|
end
|
|
@@ -159,6 +165,10 @@ module ContextDev
|
|
|
159
165
|
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
160
166
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
161
167
|
max_age_ms: nil,
|
|
168
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
169
|
+
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
170
|
+
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
171
|
+
parse_pdf: nil,
|
|
162
172
|
request_options: {}
|
|
163
173
|
)
|
|
164
174
|
end
|
|
@@ -186,6 +196,7 @@ module ContextDev
|
|
|
186
196
|
include_images: T::Boolean,
|
|
187
197
|
include_links: T::Boolean,
|
|
188
198
|
max_age_ms: Integer,
|
|
199
|
+
parse_pdf: T::Boolean,
|
|
189
200
|
shorten_base64_images: T::Boolean,
|
|
190
201
|
use_main_content_only: T::Boolean,
|
|
191
202
|
request_options: ContextDev::RequestOptions::OrHash
|
|
@@ -203,6 +214,10 @@ module ContextDev
|
|
|
203
214
|
# younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
204
215
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
205
216
|
max_age_ms: nil,
|
|
217
|
+
# When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
218
|
+
# converted to Markdown. When false, PDF URLs are skipped and a 400
|
|
219
|
+
# WEBSITE_ACCESS_ERROR is returned.
|
|
220
|
+
parse_pdf: nil,
|
|
206
221
|
# Shorten base64-encoded image data in the Markdown output
|
|
207
222
|
shorten_base64_images: nil,
|
|
208
223
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
@@ -47,6 +47,7 @@ module ContextDev
|
|
|
47
47
|
features: ::Array[String],
|
|
48
48
|
images: ::Array[String],
|
|
49
49
|
name: String,
|
|
50
|
+
sku: String?,
|
|
50
51
|
tags: ::Array[String],
|
|
51
52
|
target_audience: ::Array[String],
|
|
52
53
|
billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
|
|
@@ -67,6 +68,8 @@ module ContextDev
|
|
|
67
68
|
|
|
68
69
|
attr_accessor name: String
|
|
69
70
|
|
|
71
|
+
attr_accessor sku: String?
|
|
72
|
+
|
|
70
73
|
attr_accessor tags: ::Array[String]
|
|
71
74
|
|
|
72
75
|
attr_accessor target_audience: ::Array[String]
|
|
@@ -90,6 +93,7 @@ module ContextDev
|
|
|
90
93
|
features: ::Array[String],
|
|
91
94
|
images: ::Array[String],
|
|
92
95
|
name: String,
|
|
96
|
+
sku: String?,
|
|
93
97
|
tags: ::Array[String],
|
|
94
98
|
target_audience: ::Array[String],
|
|
95
99
|
?billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
|
|
@@ -106,6 +110,7 @@ module ContextDev
|
|
|
106
110
|
features: ::Array[String],
|
|
107
111
|
images: ::Array[String],
|
|
108
112
|
name: String,
|
|
113
|
+
sku: String?,
|
|
109
114
|
tags: ::Array[String],
|
|
110
115
|
target_audience: ::Array[String],
|
|
111
116
|
billing_frequency: ContextDev::Models::AIExtractProductResponse::Product::billing_frequency?,
|
|
@@ -26,6 +26,7 @@ module ContextDev
|
|
|
26
26
|
features: ::Array[String],
|
|
27
27
|
images: ::Array[String],
|
|
28
28
|
name: String,
|
|
29
|
+
sku: String?,
|
|
29
30
|
tags: ::Array[String],
|
|
30
31
|
target_audience: ::Array[String],
|
|
31
32
|
billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
|
|
@@ -46,6 +47,8 @@ module ContextDev
|
|
|
46
47
|
|
|
47
48
|
attr_accessor name: String
|
|
48
49
|
|
|
50
|
+
attr_accessor sku: String?
|
|
51
|
+
|
|
49
52
|
attr_accessor tags: ::Array[String]
|
|
50
53
|
|
|
51
54
|
attr_accessor target_audience: ::Array[String]
|
|
@@ -69,6 +72,7 @@ module ContextDev
|
|
|
69
72
|
features: ::Array[String],
|
|
70
73
|
images: ::Array[String],
|
|
71
74
|
name: String,
|
|
75
|
+
sku: String?,
|
|
72
76
|
tags: ::Array[String],
|
|
73
77
|
target_audience: ::Array[String],
|
|
74
78
|
?billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
|
|
@@ -85,6 +89,7 @@ module ContextDev
|
|
|
85
89
|
features: ::Array[String],
|
|
86
90
|
images: ::Array[String],
|
|
87
91
|
name: String,
|
|
92
|
+
sku: String?,
|
|
88
93
|
tags: ::Array[String],
|
|
89
94
|
target_audience: ::Array[String],
|
|
90
95
|
billing_frequency: ContextDev::Models::AIExtractProductsResponse::Product::billing_frequency?,
|
|
@@ -9,6 +9,7 @@ module ContextDev
|
|
|
9
9
|
max_age_ms: Integer,
|
|
10
10
|
max_depth: Integer,
|
|
11
11
|
max_pages: Integer,
|
|
12
|
+
parse_pdf: bool,
|
|
12
13
|
:shorten_base64_images => bool,
|
|
13
14
|
url_regex: String,
|
|
14
15
|
use_main_content_only: bool
|
|
@@ -45,6 +46,10 @@ module ContextDev
|
|
|
45
46
|
|
|
46
47
|
def max_pages=: (Integer) -> Integer
|
|
47
48
|
|
|
49
|
+
attr_reader parse_pdf: bool?
|
|
50
|
+
|
|
51
|
+
def parse_pdf=: (bool) -> bool
|
|
52
|
+
|
|
48
53
|
attr_reader shorten_base64_images: bool?
|
|
49
54
|
|
|
50
55
|
def shorten_base64_images=: (bool) -> bool
|
|
@@ -65,6 +70,7 @@ module ContextDev
|
|
|
65
70
|
?max_age_ms: Integer,
|
|
66
71
|
?max_depth: Integer,
|
|
67
72
|
?max_pages: Integer,
|
|
73
|
+
?parse_pdf: bool,
|
|
68
74
|
?shorten_base64_images: bool,
|
|
69
75
|
?url_regex: String,
|
|
70
76
|
?use_main_content_only: bool,
|
|
@@ -79,6 +85,7 @@ module ContextDev
|
|
|
79
85
|
max_age_ms: Integer,
|
|
80
86
|
max_depth: Integer,
|
|
81
87
|
max_pages: Integer,
|
|
88
|
+
parse_pdf: bool,
|
|
82
89
|
:shorten_base64_images => bool,
|
|
83
90
|
url_regex: String,
|
|
84
91
|
use_main_content_only: bool,
|
|
@@ -25,6 +25,7 @@ module ContextDev
|
|
|
25
25
|
{
|
|
26
26
|
max_crawl_depth: Integer,
|
|
27
27
|
num_failed: Integer,
|
|
28
|
+
num_skipped: Integer,
|
|
28
29
|
num_succeeded: Integer,
|
|
29
30
|
num_urls: Integer
|
|
30
31
|
}
|
|
@@ -34,6 +35,8 @@ module ContextDev
|
|
|
34
35
|
|
|
35
36
|
attr_accessor num_failed: Integer
|
|
36
37
|
|
|
38
|
+
attr_accessor num_skipped: Integer
|
|
39
|
+
|
|
37
40
|
attr_accessor num_succeeded: Integer
|
|
38
41
|
|
|
39
42
|
attr_accessor num_urls: Integer
|
|
@@ -41,6 +44,7 @@ module ContextDev
|
|
|
41
44
|
def initialize: (
|
|
42
45
|
max_crawl_depth: Integer,
|
|
43
46
|
num_failed: Integer,
|
|
47
|
+
num_skipped: Integer,
|
|
44
48
|
num_succeeded: Integer,
|
|
45
49
|
num_urls: Integer
|
|
46
50
|
) -> void
|
|
@@ -48,6 +52,7 @@ module ContextDev
|
|
|
48
52
|
def to_hash: -> {
|
|
49
53
|
max_crawl_depth: Integer,
|
|
50
54
|
num_failed: Integer,
|
|
55
|
+
num_skipped: Integer,
|
|
51
56
|
num_succeeded: Integer,
|
|
52
57
|
num_urls: Integer
|
|
53
58
|
}
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
module ContextDev
|
|
2
2
|
module Models
|
|
3
3
|
type web_web_scrape_html_params =
|
|
4
|
-
{ url: String, max_age_ms: Integer }
|
|
4
|
+
{ url: String, max_age_ms: Integer, parse_pdf: bool }
|
|
5
5
|
& ContextDev::Internal::Type::request_parameters
|
|
6
6
|
|
|
7
7
|
class WebWebScrapeHTMLParams < ContextDev::Internal::Type::BaseModel
|
|
@@ -14,15 +14,21 @@ module ContextDev
|
|
|
14
14
|
|
|
15
15
|
def max_age_ms=: (Integer) -> Integer
|
|
16
16
|
|
|
17
|
+
attr_reader parse_pdf: bool?
|
|
18
|
+
|
|
19
|
+
def parse_pdf=: (bool) -> bool
|
|
20
|
+
|
|
17
21
|
def initialize: (
|
|
18
22
|
url: String,
|
|
19
23
|
?max_age_ms: Integer,
|
|
24
|
+
?parse_pdf: bool,
|
|
20
25
|
?request_options: ContextDev::request_opts
|
|
21
26
|
) -> void
|
|
22
27
|
|
|
23
28
|
def to_hash: -> {
|
|
24
29
|
url: String,
|
|
25
30
|
max_age_ms: Integer,
|
|
31
|
+
parse_pdf: bool,
|
|
26
32
|
request_options: ContextDev::RequestOptions
|
|
27
33
|
}
|
|
28
34
|
end
|
|
@@ -6,6 +6,7 @@ module ContextDev
|
|
|
6
6
|
include_images: bool,
|
|
7
7
|
include_links: bool,
|
|
8
8
|
max_age_ms: Integer,
|
|
9
|
+
parse_pdf: bool,
|
|
9
10
|
:shorten_base64_images => bool,
|
|
10
11
|
use_main_content_only: bool
|
|
11
12
|
}
|
|
@@ -29,6 +30,10 @@ module ContextDev
|
|
|
29
30
|
|
|
30
31
|
def max_age_ms=: (Integer) -> Integer
|
|
31
32
|
|
|
33
|
+
attr_reader parse_pdf: bool?
|
|
34
|
+
|
|
35
|
+
def parse_pdf=: (bool) -> bool
|
|
36
|
+
|
|
32
37
|
attr_reader shorten_base64_images: bool?
|
|
33
38
|
|
|
34
39
|
def shorten_base64_images=: (bool) -> bool
|
|
@@ -42,6 +47,7 @@ module ContextDev
|
|
|
42
47
|
?include_images: bool,
|
|
43
48
|
?include_links: bool,
|
|
44
49
|
?max_age_ms: Integer,
|
|
50
|
+
?parse_pdf: bool,
|
|
45
51
|
?shorten_base64_images: bool,
|
|
46
52
|
?use_main_content_only: bool,
|
|
47
53
|
?request_options: ContextDev::request_opts
|
|
@@ -52,6 +58,7 @@ module ContextDev
|
|
|
52
58
|
include_images: bool,
|
|
53
59
|
include_links: bool,
|
|
54
60
|
max_age_ms: Integer,
|
|
61
|
+
parse_pdf: bool,
|
|
55
62
|
:shorten_base64_images => bool,
|
|
56
63
|
use_main_content_only: bool,
|
|
57
64
|
request_options: ContextDev::RequestOptions
|
|
@@ -32,6 +32,7 @@ module ContextDev
|
|
|
32
32
|
?max_age_ms: Integer,
|
|
33
33
|
?max_depth: Integer,
|
|
34
34
|
?max_pages: Integer,
|
|
35
|
+
?parse_pdf: bool,
|
|
35
36
|
?shorten_base64_images: bool,
|
|
36
37
|
?url_regex: String,
|
|
37
38
|
?use_main_content_only: bool,
|
|
@@ -41,6 +42,7 @@ module ContextDev
|
|
|
41
42
|
def web_scrape_html: (
|
|
42
43
|
url: String,
|
|
43
44
|
?max_age_ms: Integer,
|
|
45
|
+
?parse_pdf: bool,
|
|
44
46
|
?request_options: ContextDev::request_opts
|
|
45
47
|
) -> ContextDev::Models::WebWebScrapeHTMLResponse
|
|
46
48
|
|
|
@@ -54,6 +56,7 @@ module ContextDev
|
|
|
54
56
|
?include_images: bool,
|
|
55
57
|
?include_links: bool,
|
|
56
58
|
?max_age_ms: Integer,
|
|
59
|
+
?parse_pdf: bool,
|
|
57
60
|
?shorten_base64_images: bool,
|
|
58
61
|
?use_main_content_only: bool,
|
|
59
62
|
?request_options: ContextDev::request_opts
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: context.dev
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.8.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Context Dev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-24 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cgi
|