context.dev 1.16.0 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/lib/context_dev/models/ai_extract_product_params.rb +4 -3
- data/lib/context_dev/models/ai_extract_products_params.rb +8 -6
- data/lib/context_dev/models/web_screenshot_params.rb +18 -21
- data/lib/context_dev/models/web_web_crawl_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_html_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_images_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_sitemap_params.rb +11 -1
- data/lib/context_dev/resources/ai.rb +1 -1
- data/lib/context_dev/resources/web.rb +42 -12
- data/lib/context_dev/version.rb +1 -1
- data/rbi/context_dev/models/ai_extract_product_params.rbi +6 -4
- data/rbi/context_dev/models/ai_extract_products_params.rbi +12 -8
- data/rbi/context_dev/models/web_screenshot_params.rbi +28 -53
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_html_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_images_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_sitemap_params.rbi +15 -0
- data/rbi/context_dev/resources/ai.rbi +3 -2
- data/rbi/context_dev/resources/web.rbi +51 -5
- data/sig/context_dev/models/web_screenshot_params.rbs +13 -19
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_html_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_images_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_sitemap_params.rbs +12 -1
- data/sig/context_dev/resources/web.rbs +11 -1
- metadata +2 -2
|
@@ -69,22 +69,14 @@ module ContextDev
|
|
|
69
69
|
sig { params(page: ContextDev::WebScreenshotParams::Page::OrSymbol).void }
|
|
70
70
|
attr_writer :page
|
|
71
71
|
|
|
72
|
-
# Optional
|
|
73
|
-
#
|
|
74
|
-
#
|
|
75
|
-
sig
|
|
76
|
-
|
|
77
|
-
T.nilable(ContextDev::WebScreenshotParams::Prioritize::OrSymbol)
|
|
78
|
-
)
|
|
79
|
-
end
|
|
80
|
-
attr_reader :prioritize
|
|
72
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
73
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
74
|
+
# value is 300000ms (5 minutes).
|
|
75
|
+
sig { returns(T.nilable(Integer)) }
|
|
76
|
+
attr_reader :timeout_ms
|
|
81
77
|
|
|
82
|
-
sig
|
|
83
|
-
|
|
84
|
-
prioritize: ContextDev::WebScreenshotParams::Prioritize::OrSymbol
|
|
85
|
-
).void
|
|
86
|
-
end
|
|
87
|
-
attr_writer :prioritize
|
|
78
|
+
sig { params(timeout_ms: Integer).void }
|
|
79
|
+
attr_writer :timeout_ms
|
|
88
80
|
|
|
89
81
|
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
90
82
|
sig { returns(T.nilable(ContextDev::WebScreenshotParams::Viewport)) }
|
|
@@ -95,6 +87,15 @@ module ContextDev
|
|
|
95
87
|
end
|
|
96
88
|
attr_writer :viewport
|
|
97
89
|
|
|
90
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
91
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
92
|
+
# omitted.
|
|
93
|
+
sig { returns(T.nilable(Integer)) }
|
|
94
|
+
attr_reader :wait_for_ms
|
|
95
|
+
|
|
96
|
+
sig { params(wait_for_ms: Integer).void }
|
|
97
|
+
attr_writer :wait_for_ms
|
|
98
|
+
|
|
98
99
|
sig do
|
|
99
100
|
params(
|
|
100
101
|
direct_url: String,
|
|
@@ -103,8 +104,9 @@ module ContextDev
|
|
|
103
104
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
104
105
|
max_age_ms: Integer,
|
|
105
106
|
page: ContextDev::WebScreenshotParams::Page::OrSymbol,
|
|
106
|
-
|
|
107
|
+
timeout_ms: Integer,
|
|
107
108
|
viewport: ContextDev::WebScreenshotParams::Viewport::OrHash,
|
|
109
|
+
wait_for_ms: Integer,
|
|
108
110
|
request_options: ContextDev::RequestOptions::OrHash
|
|
109
111
|
).returns(T.attached_class)
|
|
110
112
|
end
|
|
@@ -131,12 +133,16 @@ module ContextDev
|
|
|
131
133
|
# provided, screenshots the main domain landing page. Only applicable when using
|
|
132
134
|
# 'domain', not 'directUrl'.
|
|
133
135
|
page: nil,
|
|
134
|
-
# Optional
|
|
135
|
-
#
|
|
136
|
-
#
|
|
137
|
-
|
|
136
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
137
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
138
|
+
# value is 300000ms (5 minutes).
|
|
139
|
+
timeout_ms: nil,
|
|
138
140
|
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
139
141
|
viewport: nil,
|
|
142
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
143
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
144
|
+
# omitted.
|
|
145
|
+
wait_for_ms: nil,
|
|
140
146
|
request_options: {}
|
|
141
147
|
)
|
|
142
148
|
end
|
|
@@ -150,8 +156,9 @@ module ContextDev
|
|
|
150
156
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
151
157
|
max_age_ms: Integer,
|
|
152
158
|
page: ContextDev::WebScreenshotParams::Page::OrSymbol,
|
|
153
|
-
|
|
159
|
+
timeout_ms: Integer,
|
|
154
160
|
viewport: ContextDev::WebScreenshotParams::Viewport,
|
|
161
|
+
wait_for_ms: Integer,
|
|
155
162
|
request_options: ContextDev::RequestOptions
|
|
156
163
|
}
|
|
157
164
|
)
|
|
@@ -230,38 +237,6 @@ module ContextDev
|
|
|
230
237
|
end
|
|
231
238
|
end
|
|
232
239
|
|
|
233
|
-
# Optional parameter to prioritize screenshot capture. If 'speed', optimizes for
|
|
234
|
-
# faster capture with basic quality. If 'quality', optimizes for higher quality
|
|
235
|
-
# with longer wait times. Defaults to 'quality' if not provided.
|
|
236
|
-
module Prioritize
|
|
237
|
-
extend ContextDev::Internal::Type::Enum
|
|
238
|
-
|
|
239
|
-
TaggedSymbol =
|
|
240
|
-
T.type_alias do
|
|
241
|
-
T.all(Symbol, ContextDev::WebScreenshotParams::Prioritize)
|
|
242
|
-
end
|
|
243
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
244
|
-
|
|
245
|
-
SPEED =
|
|
246
|
-
T.let(
|
|
247
|
-
:speed,
|
|
248
|
-
ContextDev::WebScreenshotParams::Prioritize::TaggedSymbol
|
|
249
|
-
)
|
|
250
|
-
QUALITY =
|
|
251
|
-
T.let(
|
|
252
|
-
:quality,
|
|
253
|
-
ContextDev::WebScreenshotParams::Prioritize::TaggedSymbol
|
|
254
|
-
)
|
|
255
|
-
|
|
256
|
-
sig do
|
|
257
|
-
override.returns(
|
|
258
|
-
T::Array[ContextDev::WebScreenshotParams::Prioritize::TaggedSymbol]
|
|
259
|
-
)
|
|
260
|
-
end
|
|
261
|
-
def self.values
|
|
262
|
-
end
|
|
263
|
-
end
|
|
264
|
-
|
|
265
240
|
class Viewport < ContextDev::Internal::Type::BaseModel
|
|
266
241
|
OrHash =
|
|
267
242
|
T.type_alias do
|
|
@@ -85,6 +85,15 @@ module ContextDev
|
|
|
85
85
|
sig { params(shorten_base64_images: T::Boolean).void }
|
|
86
86
|
attr_writer :shorten_base64_images
|
|
87
87
|
|
|
88
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
89
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
90
|
+
# value is 300000ms (5 minutes).
|
|
91
|
+
sig { returns(T.nilable(Integer)) }
|
|
92
|
+
attr_reader :timeout_ms
|
|
93
|
+
|
|
94
|
+
sig { params(timeout_ms: Integer).void }
|
|
95
|
+
attr_writer :timeout_ms
|
|
96
|
+
|
|
88
97
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
89
98
|
sig { returns(T.nilable(String)) }
|
|
90
99
|
attr_reader :url_regex
|
|
@@ -100,6 +109,14 @@ module ContextDev
|
|
|
100
109
|
sig { params(use_main_content_only: T::Boolean).void }
|
|
101
110
|
attr_writer :use_main_content_only
|
|
102
111
|
|
|
112
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
113
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
114
|
+
sig { returns(T.nilable(Integer)) }
|
|
115
|
+
attr_reader :wait_for_ms
|
|
116
|
+
|
|
117
|
+
sig { params(wait_for_ms: Integer).void }
|
|
118
|
+
attr_writer :wait_for_ms
|
|
119
|
+
|
|
103
120
|
sig do
|
|
104
121
|
params(
|
|
105
122
|
url: String,
|
|
@@ -112,8 +129,10 @@ module ContextDev
|
|
|
112
129
|
max_pages: Integer,
|
|
113
130
|
parse_pdf: T::Boolean,
|
|
114
131
|
shorten_base64_images: T::Boolean,
|
|
132
|
+
timeout_ms: Integer,
|
|
115
133
|
url_regex: String,
|
|
116
134
|
use_main_content_only: T::Boolean,
|
|
135
|
+
wait_for_ms: Integer,
|
|
117
136
|
request_options: ContextDev::RequestOptions::OrHash
|
|
118
137
|
).returns(T.attached_class)
|
|
119
138
|
end
|
|
@@ -145,11 +164,18 @@ module ContextDev
|
|
|
145
164
|
parse_pdf: nil,
|
|
146
165
|
# Truncate base64-encoded image data in the Markdown output
|
|
147
166
|
shorten_base64_images: nil,
|
|
167
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
168
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
169
|
+
# value is 300000ms (5 minutes).
|
|
170
|
+
timeout_ms: nil,
|
|
148
171
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
149
172
|
url_regex: nil,
|
|
150
173
|
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
151
174
|
# navigation
|
|
152
175
|
use_main_content_only: nil,
|
|
176
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
177
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
178
|
+
wait_for_ms: nil,
|
|
153
179
|
request_options: {}
|
|
154
180
|
)
|
|
155
181
|
end
|
|
@@ -167,8 +193,10 @@ module ContextDev
|
|
|
167
193
|
max_pages: Integer,
|
|
168
194
|
parse_pdf: T::Boolean,
|
|
169
195
|
shorten_base64_images: T::Boolean,
|
|
196
|
+
timeout_ms: Integer,
|
|
170
197
|
url_regex: String,
|
|
171
198
|
use_main_content_only: T::Boolean,
|
|
199
|
+
wait_for_ms: Integer,
|
|
172
200
|
request_options: ContextDev::RequestOptions
|
|
173
201
|
}
|
|
174
202
|
)
|
|
@@ -43,12 +43,31 @@ module ContextDev
|
|
|
43
43
|
sig { params(parse_pdf: T::Boolean).void }
|
|
44
44
|
attr_writer :parse_pdf
|
|
45
45
|
|
|
46
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
47
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
48
|
+
# value is 300000ms (5 minutes).
|
|
49
|
+
sig { returns(T.nilable(Integer)) }
|
|
50
|
+
attr_reader :timeout_ms
|
|
51
|
+
|
|
52
|
+
sig { params(timeout_ms: Integer).void }
|
|
53
|
+
attr_writer :timeout_ms
|
|
54
|
+
|
|
55
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
56
|
+
# 30000 (30 seconds).
|
|
57
|
+
sig { returns(T.nilable(Integer)) }
|
|
58
|
+
attr_reader :wait_for_ms
|
|
59
|
+
|
|
60
|
+
sig { params(wait_for_ms: Integer).void }
|
|
61
|
+
attr_writer :wait_for_ms
|
|
62
|
+
|
|
46
63
|
sig do
|
|
47
64
|
params(
|
|
48
65
|
url: String,
|
|
49
66
|
include_frames: T::Boolean,
|
|
50
67
|
max_age_ms: Integer,
|
|
51
68
|
parse_pdf: T::Boolean,
|
|
69
|
+
timeout_ms: Integer,
|
|
70
|
+
wait_for_ms: Integer,
|
|
52
71
|
request_options: ContextDev::RequestOptions::OrHash
|
|
53
72
|
).returns(T.attached_class)
|
|
54
73
|
end
|
|
@@ -65,6 +84,13 @@ module ContextDev
|
|
|
65
84
|
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
66
85
|
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
67
86
|
parse_pdf: nil,
|
|
87
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
88
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
89
|
+
# value is 300000ms (5 minutes).
|
|
90
|
+
timeout_ms: nil,
|
|
91
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
92
|
+
# 30000 (30 seconds).
|
|
93
|
+
wait_for_ms: nil,
|
|
68
94
|
request_options: {}
|
|
69
95
|
)
|
|
70
96
|
end
|
|
@@ -76,6 +102,8 @@ module ContextDev
|
|
|
76
102
|
include_frames: T::Boolean,
|
|
77
103
|
max_age_ms: Integer,
|
|
78
104
|
parse_pdf: T::Boolean,
|
|
105
|
+
timeout_ms: Integer,
|
|
106
|
+
wait_for_ms: Integer,
|
|
79
107
|
request_options: ContextDev::RequestOptions
|
|
80
108
|
}
|
|
81
109
|
)
|
|
@@ -40,11 +40,30 @@ module ContextDev
|
|
|
40
40
|
sig { params(max_age_ms: Integer).void }
|
|
41
41
|
attr_writer :max_age_ms
|
|
42
42
|
|
|
43
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
44
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
45
|
+
# value is 300000ms (5 minutes).
|
|
46
|
+
sig { returns(T.nilable(Integer)) }
|
|
47
|
+
attr_reader :timeout_ms
|
|
48
|
+
|
|
49
|
+
sig { params(timeout_ms: Integer).void }
|
|
50
|
+
attr_writer :timeout_ms
|
|
51
|
+
|
|
52
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
53
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
54
|
+
sig { returns(T.nilable(Integer)) }
|
|
55
|
+
attr_reader :wait_for_ms
|
|
56
|
+
|
|
57
|
+
sig { params(wait_for_ms: Integer).void }
|
|
58
|
+
attr_writer :wait_for_ms
|
|
59
|
+
|
|
43
60
|
sig do
|
|
44
61
|
params(
|
|
45
62
|
url: String,
|
|
46
63
|
enrichment: ContextDev::WebWebScrapeImagesParams::Enrichment::OrHash,
|
|
47
64
|
max_age_ms: Integer,
|
|
65
|
+
timeout_ms: Integer,
|
|
66
|
+
wait_for_ms: Integer,
|
|
48
67
|
request_options: ContextDev::RequestOptions::OrHash
|
|
49
68
|
).returns(T.attached_class)
|
|
50
69
|
end
|
|
@@ -57,6 +76,13 @@ module ContextDev
|
|
|
57
76
|
# Reuse a cached result this many milliseconds old or newer. Default: 86400000 (1
|
|
58
77
|
# day). Set to 0 to bypass cache. Maximum: 2592000000 (30 days).
|
|
59
78
|
max_age_ms: nil,
|
|
79
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
80
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
81
|
+
# value is 300000ms (5 minutes).
|
|
82
|
+
timeout_ms: nil,
|
|
83
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
84
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
85
|
+
wait_for_ms: nil,
|
|
60
86
|
request_options: {}
|
|
61
87
|
)
|
|
62
88
|
end
|
|
@@ -67,6 +93,8 @@ module ContextDev
|
|
|
67
93
|
url: String,
|
|
68
94
|
enrichment: ContextDev::WebWebScrapeImagesParams::Enrichment,
|
|
69
95
|
max_age_ms: Integer,
|
|
96
|
+
timeout_ms: Integer,
|
|
97
|
+
wait_for_ms: Integer,
|
|
70
98
|
request_options: ContextDev::RequestOptions
|
|
71
99
|
}
|
|
72
100
|
)
|
|
@@ -62,6 +62,15 @@ module ContextDev
|
|
|
62
62
|
sig { params(shorten_base64_images: T::Boolean).void }
|
|
63
63
|
attr_writer :shorten_base64_images
|
|
64
64
|
|
|
65
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
66
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
67
|
+
# value is 300000ms (5 minutes).
|
|
68
|
+
sig { returns(T.nilable(Integer)) }
|
|
69
|
+
attr_reader :timeout_ms
|
|
70
|
+
|
|
71
|
+
sig { params(timeout_ms: Integer).void }
|
|
72
|
+
attr_writer :timeout_ms
|
|
73
|
+
|
|
65
74
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
66
75
|
# and navigation
|
|
67
76
|
sig { returns(T.nilable(T::Boolean)) }
|
|
@@ -70,6 +79,14 @@ module ContextDev
|
|
|
70
79
|
sig { params(use_main_content_only: T::Boolean).void }
|
|
71
80
|
attr_writer :use_main_content_only
|
|
72
81
|
|
|
82
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
83
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
84
|
+
sig { returns(T.nilable(Integer)) }
|
|
85
|
+
attr_reader :wait_for_ms
|
|
86
|
+
|
|
87
|
+
sig { params(wait_for_ms: Integer).void }
|
|
88
|
+
attr_writer :wait_for_ms
|
|
89
|
+
|
|
73
90
|
sig do
|
|
74
91
|
params(
|
|
75
92
|
url: String,
|
|
@@ -79,7 +96,9 @@ module ContextDev
|
|
|
79
96
|
max_age_ms: Integer,
|
|
80
97
|
parse_pdf: T::Boolean,
|
|
81
98
|
shorten_base64_images: T::Boolean,
|
|
99
|
+
timeout_ms: Integer,
|
|
82
100
|
use_main_content_only: T::Boolean,
|
|
101
|
+
wait_for_ms: Integer,
|
|
83
102
|
request_options: ContextDev::RequestOptions::OrHash
|
|
84
103
|
).returns(T.attached_class)
|
|
85
104
|
end
|
|
@@ -103,9 +122,16 @@ module ContextDev
|
|
|
103
122
|
parse_pdf: nil,
|
|
104
123
|
# Shorten base64-encoded image data in the Markdown output
|
|
105
124
|
shorten_base64_images: nil,
|
|
125
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
126
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
127
|
+
# value is 300000ms (5 minutes).
|
|
128
|
+
timeout_ms: nil,
|
|
106
129
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
107
130
|
# and navigation
|
|
108
131
|
use_main_content_only: nil,
|
|
132
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
133
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
134
|
+
wait_for_ms: nil,
|
|
109
135
|
request_options: {}
|
|
110
136
|
)
|
|
111
137
|
end
|
|
@@ -120,7 +146,9 @@ module ContextDev
|
|
|
120
146
|
max_age_ms: Integer,
|
|
121
147
|
parse_pdf: T::Boolean,
|
|
122
148
|
shorten_base64_images: T::Boolean,
|
|
149
|
+
timeout_ms: Integer,
|
|
123
150
|
use_main_content_only: T::Boolean,
|
|
151
|
+
wait_for_ms: Integer,
|
|
124
152
|
request_options: ContextDev::RequestOptions
|
|
125
153
|
}
|
|
126
154
|
)
|
|
@@ -26,6 +26,15 @@ module ContextDev
|
|
|
26
26
|
sig { params(max_links: Integer).void }
|
|
27
27
|
attr_writer :max_links
|
|
28
28
|
|
|
29
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
30
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
31
|
+
# value is 300000ms (5 minutes).
|
|
32
|
+
sig { returns(T.nilable(Integer)) }
|
|
33
|
+
attr_reader :timeout_ms
|
|
34
|
+
|
|
35
|
+
sig { params(timeout_ms: Integer).void }
|
|
36
|
+
attr_writer :timeout_ms
|
|
37
|
+
|
|
29
38
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
30
39
|
# returned and counted against maxLinks.
|
|
31
40
|
sig { returns(T.nilable(String)) }
|
|
@@ -38,6 +47,7 @@ module ContextDev
|
|
|
38
47
|
params(
|
|
39
48
|
domain: String,
|
|
40
49
|
max_links: Integer,
|
|
50
|
+
timeout_ms: Integer,
|
|
41
51
|
url_regex: String,
|
|
42
52
|
request_options: ContextDev::RequestOptions::OrHash
|
|
43
53
|
).returns(T.attached_class)
|
|
@@ -48,6 +58,10 @@ module ContextDev
|
|
|
48
58
|
# Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
|
|
49
59
|
# Minimum is 1, maximum is 100,000.
|
|
50
60
|
max_links: nil,
|
|
61
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
62
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
63
|
+
# value is 300000ms (5 minutes).
|
|
64
|
+
timeout_ms: nil,
|
|
51
65
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
52
66
|
# returned and counted against maxLinks.
|
|
53
67
|
url_regex: nil,
|
|
@@ -60,6 +74,7 @@ module ContextDev
|
|
|
60
74
|
{
|
|
61
75
|
domain: String,
|
|
62
76
|
max_links: Integer,
|
|
77
|
+
timeout_ms: Integer,
|
|
63
78
|
url_regex: String,
|
|
64
79
|
request_options: ContextDev::RequestOptions
|
|
65
80
|
}
|
|
@@ -48,8 +48,9 @@ module ContextDev
|
|
|
48
48
|
# younger than this many milliseconds. Defaults to 7 days (604800000 ms) when
|
|
49
49
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
50
50
|
max_age_ms: nil,
|
|
51
|
-
# Optional timeout in milliseconds for the request.
|
|
52
|
-
#
|
|
51
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
52
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
53
|
+
# value is 300000ms (5 minutes).
|
|
53
54
|
timeout_ms: nil,
|
|
54
55
|
request_options: {}
|
|
55
56
|
)
|
|
@@ -67,8 +67,9 @@ module ContextDev
|
|
|
67
67
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
68
68
|
max_age_ms: Integer,
|
|
69
69
|
page: ContextDev::WebScreenshotParams::Page::OrSymbol,
|
|
70
|
-
|
|
70
|
+
timeout_ms: Integer,
|
|
71
71
|
viewport: ContextDev::WebScreenshotParams::Viewport::OrHash,
|
|
72
|
+
wait_for_ms: Integer,
|
|
72
73
|
request_options: ContextDev::RequestOptions::OrHash
|
|
73
74
|
).returns(ContextDev::Models::WebScreenshotResponse)
|
|
74
75
|
end
|
|
@@ -95,12 +96,16 @@ module ContextDev
|
|
|
95
96
|
# provided, screenshots the main domain landing page. Only applicable when using
|
|
96
97
|
# 'domain', not 'directUrl'.
|
|
97
98
|
page: nil,
|
|
98
|
-
# Optional
|
|
99
|
-
#
|
|
100
|
-
#
|
|
101
|
-
|
|
99
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
100
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
101
|
+
# value is 300000ms (5 minutes).
|
|
102
|
+
timeout_ms: nil,
|
|
102
103
|
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
103
104
|
viewport: nil,
|
|
105
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
106
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
107
|
+
# omitted.
|
|
108
|
+
wait_for_ms: nil,
|
|
104
109
|
request_options: {}
|
|
105
110
|
)
|
|
106
111
|
end
|
|
@@ -119,8 +124,10 @@ module ContextDev
|
|
|
119
124
|
max_pages: Integer,
|
|
120
125
|
parse_pdf: T::Boolean,
|
|
121
126
|
shorten_base64_images: T::Boolean,
|
|
127
|
+
timeout_ms: Integer,
|
|
122
128
|
url_regex: String,
|
|
123
129
|
use_main_content_only: T::Boolean,
|
|
130
|
+
wait_for_ms: Integer,
|
|
124
131
|
request_options: ContextDev::RequestOptions::OrHash
|
|
125
132
|
).returns(ContextDev::Models::WebWebCrawlMdResponse)
|
|
126
133
|
end
|
|
@@ -152,11 +159,18 @@ module ContextDev
|
|
|
152
159
|
parse_pdf: nil,
|
|
153
160
|
# Truncate base64-encoded image data in the Markdown output
|
|
154
161
|
shorten_base64_images: nil,
|
|
162
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
163
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
164
|
+
# value is 300000ms (5 minutes).
|
|
165
|
+
timeout_ms: nil,
|
|
155
166
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
156
167
|
url_regex: nil,
|
|
157
168
|
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
158
169
|
# navigation
|
|
159
170
|
use_main_content_only: nil,
|
|
171
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
172
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
173
|
+
wait_for_ms: nil,
|
|
160
174
|
request_options: {}
|
|
161
175
|
)
|
|
162
176
|
end
|
|
@@ -168,6 +182,8 @@ module ContextDev
|
|
|
168
182
|
include_frames: T::Boolean,
|
|
169
183
|
max_age_ms: Integer,
|
|
170
184
|
parse_pdf: T::Boolean,
|
|
185
|
+
timeout_ms: Integer,
|
|
186
|
+
wait_for_ms: Integer,
|
|
171
187
|
request_options: ContextDev::RequestOptions::OrHash
|
|
172
188
|
).returns(ContextDev::Models::WebWebScrapeHTMLResponse)
|
|
173
189
|
end
|
|
@@ -184,6 +200,13 @@ module ContextDev
|
|
|
184
200
|
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
185
201
|
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
186
202
|
parse_pdf: nil,
|
|
203
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
204
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
205
|
+
# value is 300000ms (5 minutes).
|
|
206
|
+
timeout_ms: nil,
|
|
207
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
208
|
+
# 30000 (30 seconds).
|
|
209
|
+
wait_for_ms: nil,
|
|
187
210
|
request_options: {}
|
|
188
211
|
)
|
|
189
212
|
end
|
|
@@ -197,6 +220,8 @@ module ContextDev
|
|
|
197
220
|
url: String,
|
|
198
221
|
enrichment: ContextDev::WebWebScrapeImagesParams::Enrichment::OrHash,
|
|
199
222
|
max_age_ms: Integer,
|
|
223
|
+
timeout_ms: Integer,
|
|
224
|
+
wait_for_ms: Integer,
|
|
200
225
|
request_options: ContextDev::RequestOptions::OrHash
|
|
201
226
|
).returns(ContextDev::Models::WebWebScrapeImagesResponse)
|
|
202
227
|
end
|
|
@@ -209,6 +234,13 @@ module ContextDev
|
|
|
209
234
|
# Reuse a cached result this many milliseconds old or newer. Default: 86400000 (1
|
|
210
235
|
# day). Set to 0 to bypass cache. Maximum: 2592000000 (30 days).
|
|
211
236
|
max_age_ms: nil,
|
|
237
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
238
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
239
|
+
# value is 300000ms (5 minutes).
|
|
240
|
+
timeout_ms: nil,
|
|
241
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
242
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
243
|
+
wait_for_ms: nil,
|
|
212
244
|
request_options: {}
|
|
213
245
|
)
|
|
214
246
|
end
|
|
@@ -223,7 +255,9 @@ module ContextDev
|
|
|
223
255
|
max_age_ms: Integer,
|
|
224
256
|
parse_pdf: T::Boolean,
|
|
225
257
|
shorten_base64_images: T::Boolean,
|
|
258
|
+
timeout_ms: Integer,
|
|
226
259
|
use_main_content_only: T::Boolean,
|
|
260
|
+
wait_for_ms: Integer,
|
|
227
261
|
request_options: ContextDev::RequestOptions::OrHash
|
|
228
262
|
).returns(ContextDev::Models::WebWebScrapeMdResponse)
|
|
229
263
|
end
|
|
@@ -247,9 +281,16 @@ module ContextDev
|
|
|
247
281
|
parse_pdf: nil,
|
|
248
282
|
# Shorten base64-encoded image data in the Markdown output
|
|
249
283
|
shorten_base64_images: nil,
|
|
284
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
285
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
286
|
+
# value is 300000ms (5 minutes).
|
|
287
|
+
timeout_ms: nil,
|
|
250
288
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
251
289
|
# and navigation
|
|
252
290
|
use_main_content_only: nil,
|
|
291
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
292
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
293
|
+
wait_for_ms: nil,
|
|
253
294
|
request_options: {}
|
|
254
295
|
)
|
|
255
296
|
end
|
|
@@ -259,6 +300,7 @@ module ContextDev
|
|
|
259
300
|
params(
|
|
260
301
|
domain: String,
|
|
261
302
|
max_links: Integer,
|
|
303
|
+
timeout_ms: Integer,
|
|
262
304
|
url_regex: String,
|
|
263
305
|
request_options: ContextDev::RequestOptions::OrHash
|
|
264
306
|
).returns(ContextDev::Models::WebWebScrapeSitemapResponse)
|
|
@@ -269,6 +311,10 @@ module ContextDev
|
|
|
269
311
|
# Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
|
|
270
312
|
# Minimum is 1, maximum is 100,000.
|
|
271
313
|
max_links: nil,
|
|
314
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
315
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
316
|
+
# value is 300000ms (5 minutes).
|
|
317
|
+
timeout_ms: nil,
|
|
272
318
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
273
319
|
# returned and counted against maxLinks.
|
|
274
320
|
url_regex: nil,
|
|
@@ -7,8 +7,9 @@ module ContextDev
|
|
|
7
7
|
full_screenshot: ContextDev::Models::WebScreenshotParams::full_screenshot,
|
|
8
8
|
max_age_ms: Integer,
|
|
9
9
|
page: ContextDev::Models::WebScreenshotParams::page,
|
|
10
|
-
|
|
11
|
-
viewport: ContextDev::WebScreenshotParams::Viewport
|
|
10
|
+
timeout_ms: Integer,
|
|
11
|
+
viewport: ContextDev::WebScreenshotParams::Viewport,
|
|
12
|
+
wait_for_ms: Integer
|
|
12
13
|
}
|
|
13
14
|
& ContextDev::Internal::Type::request_parameters
|
|
14
15
|
|
|
@@ -40,11 +41,9 @@ module ContextDev
|
|
|
40
41
|
ContextDev::Models::WebScreenshotParams::page
|
|
41
42
|
) -> ContextDev::Models::WebScreenshotParams::page
|
|
42
43
|
|
|
43
|
-
attr_reader
|
|
44
|
+
attr_reader timeout_ms: Integer?
|
|
44
45
|
|
|
45
|
-
def
|
|
46
|
-
ContextDev::Models::WebScreenshotParams::prioritize
|
|
47
|
-
) -> ContextDev::Models::WebScreenshotParams::prioritize
|
|
46
|
+
def timeout_ms=: (Integer) -> Integer
|
|
48
47
|
|
|
49
48
|
attr_reader viewport: ContextDev::WebScreenshotParams::Viewport?
|
|
50
49
|
|
|
@@ -52,14 +51,19 @@ module ContextDev
|
|
|
52
51
|
ContextDev::WebScreenshotParams::Viewport
|
|
53
52
|
) -> ContextDev::WebScreenshotParams::Viewport
|
|
54
53
|
|
|
54
|
+
attr_reader wait_for_ms: Integer?
|
|
55
|
+
|
|
56
|
+
def wait_for_ms=: (Integer) -> Integer
|
|
57
|
+
|
|
55
58
|
def initialize: (
|
|
56
59
|
?direct_url: String,
|
|
57
60
|
?domain: String,
|
|
58
61
|
?full_screenshot: ContextDev::Models::WebScreenshotParams::full_screenshot,
|
|
59
62
|
?max_age_ms: Integer,
|
|
60
63
|
?page: ContextDev::Models::WebScreenshotParams::page,
|
|
61
|
-
?
|
|
64
|
+
?timeout_ms: Integer,
|
|
62
65
|
?viewport: ContextDev::WebScreenshotParams::Viewport,
|
|
66
|
+
?wait_for_ms: Integer,
|
|
63
67
|
?request_options: ContextDev::request_opts
|
|
64
68
|
) -> void
|
|
65
69
|
|
|
@@ -69,8 +73,9 @@ module ContextDev
|
|
|
69
73
|
full_screenshot: ContextDev::Models::WebScreenshotParams::full_screenshot,
|
|
70
74
|
max_age_ms: Integer,
|
|
71
75
|
page: ContextDev::Models::WebScreenshotParams::page,
|
|
72
|
-
|
|
76
|
+
timeout_ms: Integer,
|
|
73
77
|
viewport: ContextDev::WebScreenshotParams::Viewport,
|
|
78
|
+
wait_for_ms: Integer,
|
|
74
79
|
request_options: ContextDev::RequestOptions
|
|
75
80
|
}
|
|
76
81
|
|
|
@@ -110,17 +115,6 @@ module ContextDev
|
|
|
110
115
|
def self?.values: -> ::Array[ContextDev::Models::WebScreenshotParams::page]
|
|
111
116
|
end
|
|
112
117
|
|
|
113
|
-
type prioritize = :speed | :quality
|
|
114
|
-
|
|
115
|
-
module Prioritize
|
|
116
|
-
extend ContextDev::Internal::Type::Enum
|
|
117
|
-
|
|
118
|
-
SPEED: :speed
|
|
119
|
-
QUALITY: :quality
|
|
120
|
-
|
|
121
|
-
def self?.values: -> ::Array[ContextDev::Models::WebScreenshotParams::prioritize]
|
|
122
|
-
end
|
|
123
|
-
|
|
124
118
|
type viewport = { height: Integer, width: Integer }
|
|
125
119
|
|
|
126
120
|
class Viewport < ContextDev::Internal::Type::BaseModel
|