context.dev 1.16.0 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/context_dev/models/ai_extract_product_params.rb +4 -3
- data/lib/context_dev/models/ai_extract_products_params.rb +8 -6
- data/lib/context_dev/models/web_screenshot_params.rb +18 -21
- data/lib/context_dev/models/web_web_crawl_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_html_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_images_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_sitemap_params.rb +11 -1
- data/lib/context_dev/resources/ai.rb +1 -1
- data/lib/context_dev/resources/web.rb +42 -12
- data/lib/context_dev/version.rb +1 -1
- data/rbi/context_dev/models/ai_extract_product_params.rbi +6 -4
- data/rbi/context_dev/models/ai_extract_products_params.rbi +12 -8
- data/rbi/context_dev/models/web_screenshot_params.rbi +28 -53
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_html_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_images_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_sitemap_params.rbi +15 -0
- data/rbi/context_dev/resources/ai.rbi +3 -2
- data/rbi/context_dev/resources/web.rbi +51 -5
- data/sig/context_dev/models/web_screenshot_params.rbs +13 -19
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_html_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_images_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_sitemap_params.rbs +12 -1
- data/sig/context_dev/resources/web.rbs +11 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e47becb29d02dd213aee4b6b49429685c9d74c35f37d49c026b8bbabc87be191
|
|
4
|
+
data.tar.gz: 2e76f0c796bf46d9ec0a1f764de47841eabf2cb290c71b279a9740b574393d98
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 859d0da8ad2c2e9fd7d0bdb4b783b9e8c75f4323d3a599b42d28ad495342ff1187f6a502449b1a325090f3ccab3f76c1d855e25c5aaaee1e7e98d690e9ac44e4
|
|
7
|
+
data.tar.gz: 5b30272a7da7dbfeb75475e067f86bb404313ee842aa86b973817ccf359dc07c53fe06890b2e2dc34b689da07ec3b12a2b77a1f871eeaa0c549196f1afbc873d
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.17.0 (2026-05-09)
|
|
4
|
+
|
|
5
|
+
Full Changelog: [v1.16.0...v1.17.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.16.0...v1.17.0)
|
|
6
|
+
|
|
7
|
+
### Features
|
|
8
|
+
|
|
9
|
+
* **api:** api update ([ae638b0](https://github.com/context-dot-dev/context-ruby-sdk/commit/ae638b04d0e08bf93e2cdd1211236d841d5313b1))
|
|
10
|
+
* **api:** api update ([8376344](https://github.com/context-dot-dev/context-ruby-sdk/commit/8376344a7a72a7ff01906bc3d6a5913a4de60bcb))
|
|
11
|
+
|
|
3
12
|
## 1.16.0 (2026-05-07)
|
|
4
13
|
|
|
5
14
|
Full Changelog: [v1.15.0...v1.16.0](https://github.com/context-dot-dev/context-ruby-sdk/compare/v1.15.0...v1.16.0)
|
data/README.md
CHANGED
|
@@ -22,8 +22,9 @@ module ContextDev
|
|
|
22
22
|
optional :max_age_ms, Integer, api_name: :maxAgeMs
|
|
23
23
|
|
|
24
24
|
# @!attribute timeout_ms
|
|
25
|
-
# Optional timeout in milliseconds for the request.
|
|
26
|
-
#
|
|
25
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
26
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
27
|
+
# value is 300000ms (5 minutes).
|
|
27
28
|
#
|
|
28
29
|
# @return [Integer, nil]
|
|
29
30
|
optional :timeout_ms, Integer, api_name: :timeoutMS
|
|
@@ -36,7 +37,7 @@ module ContextDev
|
|
|
36
37
|
#
|
|
37
38
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
38
39
|
#
|
|
39
|
-
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request.
|
|
40
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
40
41
|
#
|
|
41
42
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
42
43
|
end
|
|
@@ -45,8 +45,9 @@ module ContextDev
|
|
|
45
45
|
optional :max_products, Integer, api_name: :maxProducts
|
|
46
46
|
|
|
47
47
|
# @!attribute timeout_ms
|
|
48
|
-
# Optional timeout in milliseconds for the request.
|
|
49
|
-
#
|
|
48
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
49
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
50
|
+
# value is 300000ms (5 minutes).
|
|
50
51
|
#
|
|
51
52
|
# @return [Integer, nil]
|
|
52
53
|
optional :timeout_ms, Integer, api_name: :timeoutMS
|
|
@@ -61,7 +62,7 @@ module ContextDev
|
|
|
61
62
|
#
|
|
62
63
|
# @param max_products [Integer] Maximum number of products to extract.
|
|
63
64
|
#
|
|
64
|
-
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request.
|
|
65
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
65
66
|
end
|
|
66
67
|
|
|
67
68
|
class ByDirectURL < ContextDev::Internal::Type::BaseModel
|
|
@@ -87,8 +88,9 @@ module ContextDev
|
|
|
87
88
|
optional :max_products, Integer, api_name: :maxProducts
|
|
88
89
|
|
|
89
90
|
# @!attribute timeout_ms
|
|
90
|
-
# Optional timeout in milliseconds for the request.
|
|
91
|
-
#
|
|
91
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
92
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
93
|
+
# value is 300000ms (5 minutes).
|
|
92
94
|
#
|
|
93
95
|
# @return [Integer, nil]
|
|
94
96
|
optional :timeout_ms, Integer, api_name: :timeoutMS
|
|
@@ -104,7 +106,7 @@ module ContextDev
|
|
|
104
106
|
#
|
|
105
107
|
# @param max_products [Integer] Maximum number of products to extract.
|
|
106
108
|
#
|
|
107
|
-
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request.
|
|
109
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
108
110
|
end
|
|
109
111
|
|
|
110
112
|
# @!method self.variants
|
|
@@ -49,13 +49,13 @@ module ContextDev
|
|
|
49
49
|
# @return [Symbol, ContextDev::Models::WebScreenshotParams::Page, nil]
|
|
50
50
|
optional :page, enum: -> { ContextDev::WebScreenshotParams::Page }
|
|
51
51
|
|
|
52
|
-
# @!attribute
|
|
53
|
-
# Optional
|
|
54
|
-
#
|
|
55
|
-
#
|
|
52
|
+
# @!attribute timeout_ms
|
|
53
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
54
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
55
|
+
# value is 300000ms (5 minutes).
|
|
56
56
|
#
|
|
57
|
-
# @return [
|
|
58
|
-
optional :
|
|
57
|
+
# @return [Integer, nil]
|
|
58
|
+
optional :timeout_ms, Integer
|
|
59
59
|
|
|
60
60
|
# @!attribute viewport
|
|
61
61
|
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
@@ -63,7 +63,15 @@ module ContextDev
|
|
|
63
63
|
# @return [ContextDev::Models::WebScreenshotParams::Viewport, nil]
|
|
64
64
|
optional :viewport, -> { ContextDev::WebScreenshotParams::Viewport }
|
|
65
65
|
|
|
66
|
-
# @!
|
|
66
|
+
# @!attribute wait_for_ms
|
|
67
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
68
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
69
|
+
# omitted.
|
|
70
|
+
#
|
|
71
|
+
# @return [Integer, nil]
|
|
72
|
+
optional :wait_for_ms, Integer
|
|
73
|
+
|
|
74
|
+
# @!method initialize(direct_url: nil, domain: nil, full_screenshot: nil, max_age_ms: nil, page: nil, timeout_ms: nil, viewport: nil, wait_for_ms: nil, request_options: {})
|
|
67
75
|
# Some parameter documentations has been truncated, see
|
|
68
76
|
# {ContextDev::Models::WebScreenshotParams} for more details.
|
|
69
77
|
#
|
|
@@ -77,10 +85,12 @@ module ContextDev
|
|
|
77
85
|
#
|
|
78
86
|
# @param page [Symbol, ContextDev::Models::WebScreenshotParams::Page] Optional parameter to specify which page type to screenshot. If provided, the sy
|
|
79
87
|
#
|
|
80
|
-
# @param
|
|
88
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
81
89
|
#
|
|
82
90
|
# @param viewport [ContextDev::Models::WebScreenshotParams::Viewport] Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
83
91
|
#
|
|
92
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before taking
|
|
93
|
+
#
|
|
84
94
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
85
95
|
|
|
86
96
|
# Optional parameter to determine screenshot type. If 'true', takes a full page
|
|
@@ -117,19 +127,6 @@ module ContextDev
|
|
|
117
127
|
# @return [Array<Symbol>]
|
|
118
128
|
end
|
|
119
129
|
|
|
120
|
-
# Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
|
|
121
|
-
# faster capture with basic quality. If 'quality', optimizes for higher quality
|
|
122
|
-
# with longer wait times. Defaults to 'quality' if not provided.
|
|
123
|
-
module Prioritize
|
|
124
|
-
extend ContextDev::Internal::Type::Enum
|
|
125
|
-
|
|
126
|
-
SPEED = :speed
|
|
127
|
-
QUALITY = :quality
|
|
128
|
-
|
|
129
|
-
# @!method self.values
|
|
130
|
-
# @return [Array<Symbol>]
|
|
131
|
-
end
|
|
132
|
-
|
|
133
130
|
class Viewport < ContextDev::Internal::Type::BaseModel
|
|
134
131
|
# @!attribute height
|
|
135
132
|
# Viewport height in pixels.
|
|
@@ -74,6 +74,14 @@ module ContextDev
|
|
|
74
74
|
# @return [Boolean, nil]
|
|
75
75
|
optional :shorten_base64_images, ContextDev::Internal::Type::Boolean, api_name: :shortenBase64Images
|
|
76
76
|
|
|
77
|
+
# @!attribute timeout_ms
|
|
78
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
79
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
80
|
+
# value is 300000ms (5 minutes).
|
|
81
|
+
#
|
|
82
|
+
# @return [Integer, nil]
|
|
83
|
+
optional :timeout_ms, Integer, api_name: :timeoutMS
|
|
84
|
+
|
|
77
85
|
# @!attribute url_regex
|
|
78
86
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
79
87
|
#
|
|
@@ -87,7 +95,14 @@ module ContextDev
|
|
|
87
95
|
# @return [Boolean, nil]
|
|
88
96
|
optional :use_main_content_only, ContextDev::Internal::Type::Boolean, api_name: :useMainContentOnly
|
|
89
97
|
|
|
90
|
-
# @!
|
|
98
|
+
# @!attribute wait_for_ms
|
|
99
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
100
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
101
|
+
#
|
|
102
|
+
# @return [Integer, nil]
|
|
103
|
+
optional :wait_for_ms, Integer, api_name: :waitForMs
|
|
104
|
+
|
|
105
|
+
# @!method initialize(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
|
|
91
106
|
# Some parameter documentations has been truncated, see
|
|
92
107
|
# {ContextDev::Models::WebWebCrawlMdParams} for more details.
|
|
93
108
|
#
|
|
@@ -111,10 +126,14 @@ module ContextDev
|
|
|
111
126
|
#
|
|
112
127
|
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
113
128
|
#
|
|
129
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
130
|
+
#
|
|
114
131
|
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
115
132
|
#
|
|
116
133
|
# @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
|
|
117
134
|
#
|
|
135
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load for each craw
|
|
136
|
+
#
|
|
118
137
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
119
138
|
end
|
|
120
139
|
end
|
|
@@ -35,7 +35,22 @@ module ContextDev
|
|
|
35
35
|
# @return [Boolean, nil]
|
|
36
36
|
optional :parse_pdf, ContextDev::Internal::Type::Boolean
|
|
37
37
|
|
|
38
|
-
# @!
|
|
38
|
+
# @!attribute timeout_ms
|
|
39
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
40
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
41
|
+
# value is 300000ms (5 minutes).
|
|
42
|
+
#
|
|
43
|
+
# @return [Integer, nil]
|
|
44
|
+
optional :timeout_ms, Integer
|
|
45
|
+
|
|
46
|
+
# @!attribute wait_for_ms
|
|
47
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
48
|
+
# 30000 (30 seconds).
|
|
49
|
+
#
|
|
50
|
+
# @return [Integer, nil]
|
|
51
|
+
optional :wait_for_ms, Integer
|
|
52
|
+
|
|
53
|
+
# @!method initialize(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
|
|
39
54
|
# Some parameter documentations has been truncated, see
|
|
40
55
|
# {ContextDev::Models::WebWebScrapeHTMLParams} for more details.
|
|
41
56
|
#
|
|
@@ -47,6 +62,10 @@ module ContextDev
|
|
|
47
62
|
#
|
|
48
63
|
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
49
64
|
#
|
|
65
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
66
|
+
#
|
|
67
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
68
|
+
#
|
|
50
69
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
51
70
|
end
|
|
52
71
|
end
|
|
@@ -27,7 +27,22 @@ module ContextDev
|
|
|
27
27
|
# @return [Integer, nil]
|
|
28
28
|
optional :max_age_ms, Integer
|
|
29
29
|
|
|
30
|
-
# @!
|
|
30
|
+
# @!attribute timeout_ms
|
|
31
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
32
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
33
|
+
# value is 300000ms (5 minutes).
|
|
34
|
+
#
|
|
35
|
+
# @return [Integer, nil]
|
|
36
|
+
optional :timeout_ms, Integer
|
|
37
|
+
|
|
38
|
+
# @!attribute wait_for_ms
|
|
39
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
40
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
41
|
+
#
|
|
42
|
+
# @return [Integer, nil]
|
|
43
|
+
optional :wait_for_ms, Integer
|
|
44
|
+
|
|
45
|
+
# @!method initialize(url:, enrichment: nil, max_age_ms: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
|
|
31
46
|
# Some parameter documentations has been truncated, see
|
|
32
47
|
# {ContextDev::Models::WebWebScrapeImagesParams} for more details.
|
|
33
48
|
#
|
|
@@ -37,6 +52,10 @@ module ContextDev
|
|
|
37
52
|
#
|
|
38
53
|
# @param max_age_ms [Integer] Reuse a cached result this many milliseconds old or newer. Default: 86400000 (1
|
|
39
54
|
#
|
|
55
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
56
|
+
#
|
|
57
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before collec
|
|
58
|
+
#
|
|
40
59
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
41
60
|
|
|
42
61
|
class Enrichment < ContextDev::Internal::Type::BaseModel
|
|
@@ -54,6 +54,14 @@ module ContextDev
|
|
|
54
54
|
# @return [Boolean, nil]
|
|
55
55
|
optional :shorten_base64_images, ContextDev::Internal::Type::Boolean
|
|
56
56
|
|
|
57
|
+
# @!attribute timeout_ms
|
|
58
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
59
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
60
|
+
# value is 300000ms (5 minutes).
|
|
61
|
+
#
|
|
62
|
+
# @return [Integer, nil]
|
|
63
|
+
optional :timeout_ms, Integer
|
|
64
|
+
|
|
57
65
|
# @!attribute use_main_content_only
|
|
58
66
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
59
67
|
# and navigation
|
|
@@ -61,7 +69,14 @@ module ContextDev
|
|
|
61
69
|
# @return [Boolean, nil]
|
|
62
70
|
optional :use_main_content_only, ContextDev::Internal::Type::Boolean
|
|
63
71
|
|
|
64
|
-
# @!
|
|
72
|
+
# @!attribute wait_for_ms
|
|
73
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
74
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
75
|
+
#
|
|
76
|
+
# @return [Integer, nil]
|
|
77
|
+
optional :wait_for_ms, Integer
|
|
78
|
+
|
|
79
|
+
# @!method initialize(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
|
|
65
80
|
# Some parameter documentations has been truncated, see
|
|
66
81
|
# {ContextDev::Models::WebWebScrapeMdParams} for more details.
|
|
67
82
|
#
|
|
@@ -79,8 +94,12 @@ module ContextDev
|
|
|
79
94
|
#
|
|
80
95
|
# @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
|
|
81
96
|
#
|
|
97
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
98
|
+
#
|
|
82
99
|
# @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
83
100
|
#
|
|
101
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before conver
|
|
102
|
+
#
|
|
84
103
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
85
104
|
end
|
|
86
105
|
end
|
|
@@ -20,6 +20,14 @@ module ContextDev
|
|
|
20
20
|
# @return [Integer, nil]
|
|
21
21
|
optional :max_links, Integer
|
|
22
22
|
|
|
23
|
+
# @!attribute timeout_ms
|
|
24
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
25
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
26
|
+
# value is 300000ms (5 minutes).
|
|
27
|
+
#
|
|
28
|
+
# @return [Integer, nil]
|
|
29
|
+
optional :timeout_ms, Integer
|
|
30
|
+
|
|
23
31
|
# @!attribute url_regex
|
|
24
32
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
25
33
|
# returned and counted against maxLinks.
|
|
@@ -27,7 +35,7 @@ module ContextDev
|
|
|
27
35
|
# @return [String, nil]
|
|
28
36
|
optional :url_regex, String
|
|
29
37
|
|
|
30
|
-
# @!method initialize(domain:, max_links: nil, url_regex: nil, request_options: {})
|
|
38
|
+
# @!method initialize(domain:, max_links: nil, timeout_ms: nil, url_regex: nil, request_options: {})
|
|
31
39
|
# Some parameter documentations has been truncated, see
|
|
32
40
|
# {ContextDev::Models::WebWebScrapeSitemapParams} for more details.
|
|
33
41
|
#
|
|
@@ -35,6 +43,8 @@ module ContextDev
|
|
|
35
43
|
#
|
|
36
44
|
# @param max_links [Integer] Maximum number of links to return from the sitemap crawl. Defaults to 10,000. Mi
|
|
37
45
|
#
|
|
46
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
47
|
+
#
|
|
38
48
|
# @param url_regex [String] Optional RE2-compatible regex pattern. Only URLs matching this pattern are retur
|
|
39
49
|
#
|
|
40
50
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}]
|
|
@@ -48,7 +48,7 @@ module ContextDev
|
|
|
48
48
|
#
|
|
49
49
|
# @param max_age_ms [Integer] Return a cached result if a prior scrape for the same parameters exists and is y
|
|
50
50
|
#
|
|
51
|
-
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request.
|
|
51
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
52
52
|
#
|
|
53
53
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
54
54
|
#
|
|
@@ -70,7 +70,7 @@ module ContextDev
|
|
|
70
70
|
#
|
|
71
71
|
# Capture a screenshot of a website.
|
|
72
72
|
#
|
|
73
|
-
# @overload screenshot(direct_url: nil, domain: nil, full_screenshot: nil, max_age_ms: nil, page: nil,
|
|
73
|
+
# @overload screenshot(direct_url: nil, domain: nil, full_screenshot: nil, max_age_ms: nil, page: nil, timeout_ms: nil, viewport: nil, wait_for_ms: nil, request_options: {})
|
|
74
74
|
#
|
|
75
75
|
# @param direct_url [String] A specific URL to screenshot directly, bypassing domain resolution (e.g., 'https
|
|
76
76
|
#
|
|
@@ -82,10 +82,12 @@ module ContextDev
|
|
|
82
82
|
#
|
|
83
83
|
# @param page [Symbol, ContextDev::Models::WebScreenshotParams::Page] Optional parameter to specify which page type to screenshot. If provided, the sy
|
|
84
84
|
#
|
|
85
|
-
# @param
|
|
85
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
86
86
|
#
|
|
87
87
|
# @param viewport [ContextDev::Models::WebScreenshotParams::Viewport] Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
88
88
|
#
|
|
89
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before taking
|
|
90
|
+
#
|
|
89
91
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
90
92
|
#
|
|
91
93
|
# @return [ContextDev::Models::WebScreenshotResponse]
|
|
@@ -100,7 +102,9 @@ module ContextDev
|
|
|
100
102
|
query: query.transform_keys(
|
|
101
103
|
direct_url: "directUrl",
|
|
102
104
|
full_screenshot: "fullScreenshot",
|
|
103
|
-
max_age_ms: "maxAgeMs"
|
|
105
|
+
max_age_ms: "maxAgeMs",
|
|
106
|
+
timeout_ms: "timeoutMS",
|
|
107
|
+
wait_for_ms: "waitForMs"
|
|
104
108
|
),
|
|
105
109
|
model: ContextDev::Models::WebScreenshotResponse,
|
|
106
110
|
options: options
|
|
@@ -113,7 +117,7 @@ module ContextDev
|
|
|
113
117
|
# Performs a crawl starting from a given URL, extracts page content as Markdown,
|
|
114
118
|
# and returns results for all crawled pages.
|
|
115
119
|
#
|
|
116
|
-
# @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, url_regex: nil, use_main_content_only: nil, request_options: {})
|
|
120
|
+
# @overload web_crawl_md(url:, follow_subdomains: nil, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, max_depth: nil, max_pages: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, url_regex: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
|
|
117
121
|
#
|
|
118
122
|
# @param url [String] The starting URL for the crawl (must include http:// or https:// protocol)
|
|
119
123
|
#
|
|
@@ -135,10 +139,14 @@ module ContextDev
|
|
|
135
139
|
#
|
|
136
140
|
# @param shorten_base64_images [Boolean] Truncate base64-encoded image data in the Markdown output
|
|
137
141
|
#
|
|
142
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
143
|
+
#
|
|
138
144
|
# @param url_regex [String] Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
139
145
|
#
|
|
140
146
|
# @param use_main_content_only [Boolean] Extract only the main content, stripping headers, footers, sidebars, and navigat
|
|
141
147
|
#
|
|
148
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load for each craw
|
|
149
|
+
#
|
|
142
150
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
143
151
|
#
|
|
144
152
|
# @return [ContextDev::Models::WebWebCrawlMdResponse]
|
|
@@ -160,7 +168,7 @@ module ContextDev
|
|
|
160
168
|
#
|
|
161
169
|
# Scrapes the given URL and returns the raw HTML content of the page.
|
|
162
170
|
#
|
|
163
|
-
# @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, request_options: {})
|
|
171
|
+
# @overload web_scrape_html(url:, include_frames: nil, max_age_ms: nil, parse_pdf: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
|
|
164
172
|
#
|
|
165
173
|
# @param url [String] Full URL to scrape (must include http:// or https:// protocol)
|
|
166
174
|
#
|
|
@@ -170,6 +178,10 @@ module ContextDev
|
|
|
170
178
|
#
|
|
171
179
|
# @param parse_pdf [Boolean] When true (default), PDF URLs are fetched and their text layer is extracted and
|
|
172
180
|
#
|
|
181
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
182
|
+
#
|
|
183
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
184
|
+
#
|
|
173
185
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
174
186
|
#
|
|
175
187
|
# @return [ContextDev::Models::WebWebScrapeHTMLResponse]
|
|
@@ -184,7 +196,9 @@ module ContextDev
|
|
|
184
196
|
query: query.transform_keys(
|
|
185
197
|
include_frames: "includeFrames",
|
|
186
198
|
max_age_ms: "maxAgeMs",
|
|
187
|
-
parse_pdf: "parsePDF"
|
|
199
|
+
parse_pdf: "parsePDF",
|
|
200
|
+
timeout_ms: "timeoutMS",
|
|
201
|
+
wait_for_ms: "waitForMs"
|
|
188
202
|
),
|
|
189
203
|
model: ContextDev::Models::WebWebScrapeHTMLResponse,
|
|
190
204
|
options: options
|
|
@@ -199,7 +213,7 @@ module ContextDev
|
|
|
199
213
|
# embeds. The base request costs 1 credit; enrichment costs 1 credit per returned
|
|
200
214
|
# image.
|
|
201
215
|
#
|
|
202
|
-
# @overload web_scrape_images(url:, enrichment: nil, max_age_ms: nil, request_options: {})
|
|
216
|
+
# @overload web_scrape_images(url:, enrichment: nil, max_age_ms: nil, timeout_ms: nil, wait_for_ms: nil, request_options: {})
|
|
203
217
|
#
|
|
204
218
|
# @param url [String] Page URL to inspect. Must include http:// or https://.
|
|
205
219
|
#
|
|
@@ -207,6 +221,10 @@ module ContextDev
|
|
|
207
221
|
#
|
|
208
222
|
# @param max_age_ms [Integer] Reuse a cached result this many milliseconds old or newer. Default: 86400000 (1
|
|
209
223
|
#
|
|
224
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
225
|
+
#
|
|
226
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before collec
|
|
227
|
+
#
|
|
210
228
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
211
229
|
#
|
|
212
230
|
# @return [ContextDev::Models::WebWebScrapeImagesResponse]
|
|
@@ -218,7 +236,11 @@ module ContextDev
|
|
|
218
236
|
@client.request(
|
|
219
237
|
method: :get,
|
|
220
238
|
path: "web/scrape/images",
|
|
221
|
-
query: query.transform_keys(
|
|
239
|
+
query: query.transform_keys(
|
|
240
|
+
max_age_ms: "maxAgeMs",
|
|
241
|
+
timeout_ms: "timeoutMS",
|
|
242
|
+
wait_for_ms: "waitForMs"
|
|
243
|
+
),
|
|
222
244
|
model: ContextDev::Models::WebWebScrapeImagesResponse,
|
|
223
245
|
options: options
|
|
224
246
|
)
|
|
@@ -229,7 +251,7 @@ module ContextDev
|
|
|
229
251
|
#
|
|
230
252
|
# Scrapes the given URL into LLM usable Markdown.
|
|
231
253
|
#
|
|
232
|
-
# @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, use_main_content_only: nil, request_options: {})
|
|
254
|
+
# @overload web_scrape_md(url:, include_frames: nil, include_images: nil, include_links: nil, max_age_ms: nil, parse_pdf: nil, shorten_base64_images: nil, timeout_ms: nil, use_main_content_only: nil, wait_for_ms: nil, request_options: {})
|
|
233
255
|
#
|
|
234
256
|
# @param url [String] Full URL to scrape into LLM usable Markdown (must include http:// or https:// pr
|
|
235
257
|
#
|
|
@@ -245,8 +267,12 @@ module ContextDev
|
|
|
245
267
|
#
|
|
246
268
|
# @param shorten_base64_images [Boolean] Shorten base64-encoded image data in the Markdown output
|
|
247
269
|
#
|
|
270
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
271
|
+
#
|
|
248
272
|
# @param use_main_content_only [Boolean] Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
249
273
|
#
|
|
274
|
+
# @param wait_for_ms [Integer] Optional browser wait time in milliseconds after initial page load before conver
|
|
275
|
+
#
|
|
250
276
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
251
277
|
#
|
|
252
278
|
# @return [ContextDev::Models::WebWebScrapeMdResponse]
|
|
@@ -265,7 +291,9 @@ module ContextDev
|
|
|
265
291
|
max_age_ms: "maxAgeMs",
|
|
266
292
|
parse_pdf: "parsePDF",
|
|
267
293
|
shorten_base64_images: "shortenBase64Images",
|
|
268
|
-
|
|
294
|
+
timeout_ms: "timeoutMS",
|
|
295
|
+
use_main_content_only: "useMainContentOnly",
|
|
296
|
+
wait_for_ms: "waitForMs"
|
|
269
297
|
),
|
|
270
298
|
model: ContextDev::Models::WebWebScrapeMdResponse,
|
|
271
299
|
options: options
|
|
@@ -277,12 +305,14 @@ module ContextDev
|
|
|
277
305
|
#
|
|
278
306
|
# Crawl an entire website's sitemap and return all discovered page URLs.
|
|
279
307
|
#
|
|
280
|
-
# @overload web_scrape_sitemap(domain:, max_links: nil, url_regex: nil, request_options: {})
|
|
308
|
+
# @overload web_scrape_sitemap(domain:, max_links: nil, timeout_ms: nil, url_regex: nil, request_options: {})
|
|
281
309
|
#
|
|
282
310
|
# @param domain [String] Domain to build a sitemap for
|
|
283
311
|
#
|
|
284
312
|
# @param max_links [Integer] Maximum number of links to return from the sitemap crawl. Defaults to 10,000. Mi
|
|
285
313
|
#
|
|
314
|
+
# @param timeout_ms [Integer] Optional timeout in milliseconds for the request. If the request takes longer th
|
|
315
|
+
#
|
|
286
316
|
# @param url_regex [String] Optional RE2-compatible regex pattern. Only URLs matching this pattern are retur
|
|
287
317
|
#
|
|
288
318
|
# @param request_options [ContextDev::RequestOptions, Hash{Symbol=>Object}, nil]
|
|
@@ -296,7 +326,7 @@ module ContextDev
|
|
|
296
326
|
@client.request(
|
|
297
327
|
method: :get,
|
|
298
328
|
path: "web/scrape/sitemap",
|
|
299
|
-
query: query.transform_keys(max_links: "maxLinks", url_regex: "urlRegex"),
|
|
329
|
+
query: query.transform_keys(max_links: "maxLinks", timeout_ms: "timeoutMS", url_regex: "urlRegex"),
|
|
300
330
|
model: ContextDev::Models::WebWebScrapeSitemapResponse,
|
|
301
331
|
options: options
|
|
302
332
|
)
|
data/lib/context_dev/version.rb
CHANGED
|
@@ -27,8 +27,9 @@ module ContextDev
|
|
|
27
27
|
sig { params(max_age_ms: Integer).void }
|
|
28
28
|
attr_writer :max_age_ms
|
|
29
29
|
|
|
30
|
-
# Optional timeout in milliseconds for the request.
|
|
31
|
-
#
|
|
30
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
31
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
32
|
+
# value is 300000ms (5 minutes).
|
|
32
33
|
sig { returns(T.nilable(Integer)) }
|
|
33
34
|
attr_reader :timeout_ms
|
|
34
35
|
|
|
@@ -50,8 +51,9 @@ module ContextDev
|
|
|
50
51
|
# younger than this many milliseconds. Defaults to 7 days (604800000 ms) when
|
|
51
52
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
52
53
|
max_age_ms: nil,
|
|
53
|
-
# Optional timeout in milliseconds for the request.
|
|
54
|
-
#
|
|
54
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
55
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
56
|
+
# value is 300000ms (5 minutes).
|
|
55
57
|
timeout_ms: nil,
|
|
56
58
|
request_options: {}
|
|
57
59
|
)
|
|
@@ -92,8 +92,9 @@ module ContextDev
|
|
|
92
92
|
sig { params(max_products: Integer).void }
|
|
93
93
|
attr_writer :max_products
|
|
94
94
|
|
|
95
|
-
# Optional timeout in milliseconds for the request.
|
|
96
|
-
#
|
|
95
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
96
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
97
|
+
# value is 300000ms (5 minutes).
|
|
97
98
|
sig { returns(T.nilable(Integer)) }
|
|
98
99
|
attr_reader :timeout_ms
|
|
99
100
|
|
|
@@ -117,8 +118,9 @@ module ContextDev
|
|
|
117
118
|
max_age_ms: nil,
|
|
118
119
|
# Maximum number of products to extract.
|
|
119
120
|
max_products: nil,
|
|
120
|
-
# Optional timeout in milliseconds for the request.
|
|
121
|
-
#
|
|
121
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
122
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
123
|
+
# value is 300000ms (5 minutes).
|
|
122
124
|
timeout_ms: nil
|
|
123
125
|
)
|
|
124
126
|
end
|
|
@@ -167,8 +169,9 @@ module ContextDev
|
|
|
167
169
|
sig { params(max_products: Integer).void }
|
|
168
170
|
attr_writer :max_products
|
|
169
171
|
|
|
170
|
-
# Optional timeout in milliseconds for the request.
|
|
171
|
-
#
|
|
172
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
173
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
174
|
+
# value is 300000ms (5 minutes).
|
|
172
175
|
sig { returns(T.nilable(Integer)) }
|
|
173
176
|
attr_reader :timeout_ms
|
|
174
177
|
|
|
@@ -193,8 +196,9 @@ module ContextDev
|
|
|
193
196
|
max_age_ms: nil,
|
|
194
197
|
# Maximum number of products to extract.
|
|
195
198
|
max_products: nil,
|
|
196
|
-
# Optional timeout in milliseconds for the request.
|
|
197
|
-
#
|
|
199
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
200
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
201
|
+
# value is 300000ms (5 minutes).
|
|
198
202
|
timeout_ms: nil
|
|
199
203
|
)
|
|
200
204
|
end
|