context.dev 1.15.0 → 1.17.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/README.md +1 -1
- data/lib/context_dev/models/ai_extract_product_params.rb +4 -3
- data/lib/context_dev/models/ai_extract_products_params.rb +8 -6
- data/lib/context_dev/models/web_screenshot_params.rb +55 -19
- data/lib/context_dev/models/web_screenshot_response.rb +17 -1
- data/lib/context_dev/models/web_web_crawl_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_html_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_images_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_md_params.rb +20 -1
- data/lib/context_dev/models/web_web_scrape_sitemap_params.rb +11 -1
- data/lib/context_dev/resources/ai.rb +1 -1
- data/lib/context_dev/resources/web.rb +50 -12
- data/lib/context_dev/version.rb +1 -1
- data/rbi/context_dev/models/ai_extract_product_params.rbi +6 -4
- data/rbi/context_dev/models/ai_extract_products_params.rbi +12 -8
- data/rbi/context_dev/models/web_screenshot_params.rbi +85 -42
- data/rbi/context_dev/models/web_screenshot_response.rbi +25 -3
- data/rbi/context_dev/models/web_web_crawl_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_html_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_images_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_md_params.rbi +28 -0
- data/rbi/context_dev/models/web_web_scrape_sitemap_params.rbi +15 -0
- data/rbi/context_dev/resources/ai.rbi +3 -2
- data/rbi/context_dev/resources/web.rbi +59 -5
- data/sig/context_dev/models/web_screenshot_params.rbs +39 -13
- data/sig/context_dev/models/web_screenshot_response.rbs +17 -3
- data/sig/context_dev/models/web_web_crawl_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_html_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_images_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_md_params.rbs +15 -1
- data/sig/context_dev/models/web_web_scrape_sitemap_params.rbs +12 -1
- data/sig/context_dev/resources/web.rbs +13 -1
- metadata +2 -2
|
@@ -27,8 +27,9 @@ module ContextDev
|
|
|
27
27
|
sig { params(max_age_ms: Integer).void }
|
|
28
28
|
attr_writer :max_age_ms
|
|
29
29
|
|
|
30
|
-
# Optional timeout in milliseconds for the request.
|
|
31
|
-
#
|
|
30
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
31
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
32
|
+
# value is 300000ms (5 minutes).
|
|
32
33
|
sig { returns(T.nilable(Integer)) }
|
|
33
34
|
attr_reader :timeout_ms
|
|
34
35
|
|
|
@@ -50,8 +51,9 @@ module ContextDev
|
|
|
50
51
|
# younger than this many milliseconds. Defaults to 7 days (604800000 ms) when
|
|
51
52
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
52
53
|
max_age_ms: nil,
|
|
53
|
-
# Optional timeout in milliseconds for the request.
|
|
54
|
-
#
|
|
54
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
55
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
56
|
+
# value is 300000ms (5 minutes).
|
|
55
57
|
timeout_ms: nil,
|
|
56
58
|
request_options: {}
|
|
57
59
|
)
|
|
@@ -92,8 +92,9 @@ module ContextDev
|
|
|
92
92
|
sig { params(max_products: Integer).void }
|
|
93
93
|
attr_writer :max_products
|
|
94
94
|
|
|
95
|
-
# Optional timeout in milliseconds for the request.
|
|
96
|
-
#
|
|
95
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
96
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
97
|
+
# value is 300000ms (5 minutes).
|
|
97
98
|
sig { returns(T.nilable(Integer)) }
|
|
98
99
|
attr_reader :timeout_ms
|
|
99
100
|
|
|
@@ -117,8 +118,9 @@ module ContextDev
|
|
|
117
118
|
max_age_ms: nil,
|
|
118
119
|
# Maximum number of products to extract.
|
|
119
120
|
max_products: nil,
|
|
120
|
-
# Optional timeout in milliseconds for the request.
|
|
121
|
-
#
|
|
121
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
122
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
123
|
+
# value is 300000ms (5 minutes).
|
|
122
124
|
timeout_ms: nil
|
|
123
125
|
)
|
|
124
126
|
end
|
|
@@ -167,8 +169,9 @@ module ContextDev
|
|
|
167
169
|
sig { params(max_products: Integer).void }
|
|
168
170
|
attr_writer :max_products
|
|
169
171
|
|
|
170
|
-
# Optional timeout in milliseconds for the request.
|
|
171
|
-
#
|
|
172
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
173
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
174
|
+
# value is 300000ms (5 minutes).
|
|
172
175
|
sig { returns(T.nilable(Integer)) }
|
|
173
176
|
attr_reader :timeout_ms
|
|
174
177
|
|
|
@@ -193,8 +196,9 @@ module ContextDev
|
|
|
193
196
|
max_age_ms: nil,
|
|
194
197
|
# Maximum number of products to extract.
|
|
195
198
|
max_products: nil,
|
|
196
|
-
# Optional timeout in milliseconds for the request.
|
|
197
|
-
#
|
|
199
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
200
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
201
|
+
# value is 300000ms (5 minutes).
|
|
198
202
|
timeout_ms: nil
|
|
199
203
|
)
|
|
200
204
|
end
|
|
@@ -47,6 +47,15 @@ module ContextDev
|
|
|
47
47
|
end
|
|
48
48
|
attr_writer :full_screenshot
|
|
49
49
|
|
|
50
|
+
# Return a cached screenshot if a prior screenshot for the same parameters exists
|
|
51
|
+
# and is younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
52
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always capture fresh.
|
|
53
|
+
sig { returns(T.nilable(Integer)) }
|
|
54
|
+
attr_reader :max_age_ms
|
|
55
|
+
|
|
56
|
+
sig { params(max_age_ms: Integer).void }
|
|
57
|
+
attr_writer :max_age_ms
|
|
58
|
+
|
|
50
59
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
51
60
|
# system will scrape the domain's links and use heuristics to find the most
|
|
52
61
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
@@ -60,22 +69,32 @@ module ContextDev
|
|
|
60
69
|
sig { params(page: ContextDev::WebScreenshotParams::Page::OrSymbol).void }
|
|
61
70
|
attr_writer :page
|
|
62
71
|
|
|
63
|
-
# Optional
|
|
64
|
-
#
|
|
65
|
-
#
|
|
66
|
-
sig
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
73
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
74
|
+
# value is 300000ms (5 minutes).
|
|
75
|
+
sig { returns(T.nilable(Integer)) }
|
|
76
|
+
attr_reader :timeout_ms
|
|
77
|
+
|
|
78
|
+
sig { params(timeout_ms: Integer).void }
|
|
79
|
+
attr_writer :timeout_ms
|
|
80
|
+
|
|
81
|
+
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
82
|
+
sig { returns(T.nilable(ContextDev::WebScreenshotParams::Viewport)) }
|
|
83
|
+
attr_reader :viewport
|
|
72
84
|
|
|
73
85
|
sig do
|
|
74
|
-
params(
|
|
75
|
-
prioritize: ContextDev::WebScreenshotParams::Prioritize::OrSymbol
|
|
76
|
-
).void
|
|
86
|
+
params(viewport: ContextDev::WebScreenshotParams::Viewport::OrHash).void
|
|
77
87
|
end
|
|
78
|
-
attr_writer :
|
|
88
|
+
attr_writer :viewport
|
|
89
|
+
|
|
90
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
91
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
92
|
+
# omitted.
|
|
93
|
+
sig { returns(T.nilable(Integer)) }
|
|
94
|
+
attr_reader :wait_for_ms
|
|
95
|
+
|
|
96
|
+
sig { params(wait_for_ms: Integer).void }
|
|
97
|
+
attr_writer :wait_for_ms
|
|
79
98
|
|
|
80
99
|
sig do
|
|
81
100
|
params(
|
|
@@ -83,8 +102,11 @@ module ContextDev
|
|
|
83
102
|
domain: String,
|
|
84
103
|
full_screenshot:
|
|
85
104
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
105
|
+
max_age_ms: Integer,
|
|
86
106
|
page: ContextDev::WebScreenshotParams::Page::OrSymbol,
|
|
87
|
-
|
|
107
|
+
timeout_ms: Integer,
|
|
108
|
+
viewport: ContextDev::WebScreenshotParams::Viewport::OrHash,
|
|
109
|
+
wait_for_ms: Integer,
|
|
88
110
|
request_options: ContextDev::RequestOptions::OrHash
|
|
89
111
|
).returns(T.attached_class)
|
|
90
112
|
end
|
|
@@ -101,16 +123,26 @@ module ContextDev
|
|
|
101
123
|
# screenshot capturing all content. If 'false' or not provided, takes a viewport
|
|
102
124
|
# screenshot (standard browser view).
|
|
103
125
|
full_screenshot: nil,
|
|
126
|
+
# Return a cached screenshot if a prior screenshot for the same parameters exists
|
|
127
|
+
# and is younger than this many milliseconds. Defaults to 1 day (86400000 ms) when
|
|
128
|
+
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always capture fresh.
|
|
129
|
+
max_age_ms: nil,
|
|
104
130
|
# Optional parameter to specify which page type to screenshot. If provided, the
|
|
105
131
|
# system will scrape the domain's links and use heuristics to find the most
|
|
106
132
|
# appropriate URL for the specified page type (30 supported languages). If not
|
|
107
133
|
# provided, screenshots the main domain landing page. Only applicable when using
|
|
108
134
|
# 'domain', not 'directUrl'.
|
|
109
135
|
page: nil,
|
|
110
|
-
# Optional
|
|
111
|
-
#
|
|
112
|
-
#
|
|
113
|
-
|
|
136
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
137
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
138
|
+
# value is 300000ms (5 minutes).
|
|
139
|
+
timeout_ms: nil,
|
|
140
|
+
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
141
|
+
viewport: nil,
|
|
142
|
+
# Optional browser wait time in milliseconds after initial page load before taking
|
|
143
|
+
# the screenshot. Min: 0. Max: 30000 (30 seconds). Defaults to 3000 ms when
|
|
144
|
+
# omitted.
|
|
145
|
+
wait_for_ms: nil,
|
|
114
146
|
request_options: {}
|
|
115
147
|
)
|
|
116
148
|
end
|
|
@@ -122,8 +154,11 @@ module ContextDev
|
|
|
122
154
|
domain: String,
|
|
123
155
|
full_screenshot:
|
|
124
156
|
ContextDev::WebScreenshotParams::FullScreenshot::OrSymbol,
|
|
157
|
+
max_age_ms: Integer,
|
|
125
158
|
page: ContextDev::WebScreenshotParams::Page::OrSymbol,
|
|
126
|
-
|
|
159
|
+
timeout_ms: Integer,
|
|
160
|
+
viewport: ContextDev::WebScreenshotParams::Viewport,
|
|
161
|
+
wait_for_ms: Integer,
|
|
127
162
|
request_options: ContextDev::RequestOptions
|
|
128
163
|
}
|
|
129
164
|
)
|
|
@@ -202,35 +237,43 @@ module ContextDev
|
|
|
202
237
|
end
|
|
203
238
|
end
|
|
204
239
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
# with longer wait times. Defaults to 'quality' if not provided.
|
|
208
|
-
module Prioritize
|
|
209
|
-
extend ContextDev::Internal::Type::Enum
|
|
210
|
-
|
|
211
|
-
TaggedSymbol =
|
|
240
|
+
class Viewport < ContextDev::Internal::Type::BaseModel
|
|
241
|
+
OrHash =
|
|
212
242
|
T.type_alias do
|
|
213
|
-
T.
|
|
243
|
+
T.any(
|
|
244
|
+
ContextDev::WebScreenshotParams::Viewport,
|
|
245
|
+
ContextDev::Internal::AnyHash
|
|
246
|
+
)
|
|
214
247
|
end
|
|
215
|
-
OrSymbol = T.type_alias { T.any(Symbol, String) }
|
|
216
248
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
249
|
+
# Viewport height in pixels.
|
|
250
|
+
sig { returns(T.nilable(Integer)) }
|
|
251
|
+
attr_reader :height
|
|
252
|
+
|
|
253
|
+
sig { params(height: Integer).void }
|
|
254
|
+
attr_writer :height
|
|
255
|
+
|
|
256
|
+
# Viewport width in pixels.
|
|
257
|
+
sig { returns(T.nilable(Integer)) }
|
|
258
|
+
attr_reader :width
|
|
227
259
|
|
|
260
|
+
sig { params(width: Integer).void }
|
|
261
|
+
attr_writer :width
|
|
262
|
+
|
|
263
|
+
# Optional browser viewport dimensions for the screenshot. Defaults to 1920x1080.
|
|
228
264
|
sig do
|
|
229
|
-
|
|
230
|
-
T::Array[ContextDev::WebScreenshotParams::Prioritize::TaggedSymbol]
|
|
231
|
-
)
|
|
265
|
+
params(height: Integer, width: Integer).returns(T.attached_class)
|
|
232
266
|
end
|
|
233
|
-
def self.
|
|
267
|
+
def self.new(
|
|
268
|
+
# Viewport height in pixels.
|
|
269
|
+
height: nil,
|
|
270
|
+
# Viewport width in pixels.
|
|
271
|
+
width: nil
|
|
272
|
+
)
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
sig { override.returns({ height: Integer, width: Integer }) }
|
|
276
|
+
def to_hash
|
|
234
277
|
end
|
|
235
278
|
end
|
|
236
279
|
end
|
|
@@ -25,6 +25,13 @@ module ContextDev
|
|
|
25
25
|
sig { params(domain: String).void }
|
|
26
26
|
attr_writer :domain
|
|
27
27
|
|
|
28
|
+
# Height in pixels of the returned screenshot image
|
|
29
|
+
sig { returns(T.nilable(Integer)) }
|
|
30
|
+
attr_reader :height
|
|
31
|
+
|
|
32
|
+
sig { params(height: Integer).void }
|
|
33
|
+
attr_writer :height
|
|
34
|
+
|
|
28
35
|
# Public URL of the uploaded screenshot image
|
|
29
36
|
sig { returns(T.nilable(String)) }
|
|
30
37
|
attr_reader :screenshot
|
|
@@ -57,14 +64,23 @@ module ContextDev
|
|
|
57
64
|
sig { params(status: String).void }
|
|
58
65
|
attr_writer :status
|
|
59
66
|
|
|
67
|
+
# Width in pixels of the returned screenshot image
|
|
68
|
+
sig { returns(T.nilable(Integer)) }
|
|
69
|
+
attr_reader :width
|
|
70
|
+
|
|
71
|
+
sig { params(width: Integer).void }
|
|
72
|
+
attr_writer :width
|
|
73
|
+
|
|
60
74
|
sig do
|
|
61
75
|
params(
|
|
62
76
|
code: Integer,
|
|
63
77
|
domain: String,
|
|
78
|
+
height: Integer,
|
|
64
79
|
screenshot: String,
|
|
65
80
|
screenshot_type:
|
|
66
81
|
ContextDev::Models::WebScreenshotResponse::ScreenshotType::OrSymbol,
|
|
67
|
-
status: String
|
|
82
|
+
status: String,
|
|
83
|
+
width: Integer
|
|
68
84
|
).returns(T.attached_class)
|
|
69
85
|
end
|
|
70
86
|
def self.new(
|
|
@@ -72,12 +88,16 @@ module ContextDev
|
|
|
72
88
|
code: nil,
|
|
73
89
|
# The normalized domain that was processed
|
|
74
90
|
domain: nil,
|
|
91
|
+
# Height in pixels of the returned screenshot image
|
|
92
|
+
height: nil,
|
|
75
93
|
# Public URL of the uploaded screenshot image
|
|
76
94
|
screenshot: nil,
|
|
77
95
|
# Type of screenshot that was captured
|
|
78
96
|
screenshot_type: nil,
|
|
79
97
|
# Status of the response, e.g., 'ok'
|
|
80
|
-
status: nil
|
|
98
|
+
status: nil,
|
|
99
|
+
# Width in pixels of the returned screenshot image
|
|
100
|
+
width: nil
|
|
81
101
|
)
|
|
82
102
|
end
|
|
83
103
|
|
|
@@ -86,10 +106,12 @@ module ContextDev
|
|
|
86
106
|
{
|
|
87
107
|
code: Integer,
|
|
88
108
|
domain: String,
|
|
109
|
+
height: Integer,
|
|
89
110
|
screenshot: String,
|
|
90
111
|
screenshot_type:
|
|
91
112
|
ContextDev::Models::WebScreenshotResponse::ScreenshotType::TaggedSymbol,
|
|
92
|
-
status: String
|
|
113
|
+
status: String,
|
|
114
|
+
width: Integer
|
|
93
115
|
}
|
|
94
116
|
)
|
|
95
117
|
end
|
|
@@ -85,6 +85,15 @@ module ContextDev
|
|
|
85
85
|
sig { params(shorten_base64_images: T::Boolean).void }
|
|
86
86
|
attr_writer :shorten_base64_images
|
|
87
87
|
|
|
88
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
89
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
90
|
+
# value is 300000ms (5 minutes).
|
|
91
|
+
sig { returns(T.nilable(Integer)) }
|
|
92
|
+
attr_reader :timeout_ms
|
|
93
|
+
|
|
94
|
+
sig { params(timeout_ms: Integer).void }
|
|
95
|
+
attr_writer :timeout_ms
|
|
96
|
+
|
|
88
97
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
89
98
|
sig { returns(T.nilable(String)) }
|
|
90
99
|
attr_reader :url_regex
|
|
@@ -100,6 +109,14 @@ module ContextDev
|
|
|
100
109
|
sig { params(use_main_content_only: T::Boolean).void }
|
|
101
110
|
attr_writer :use_main_content_only
|
|
102
111
|
|
|
112
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
113
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
114
|
+
sig { returns(T.nilable(Integer)) }
|
|
115
|
+
attr_reader :wait_for_ms
|
|
116
|
+
|
|
117
|
+
sig { params(wait_for_ms: Integer).void }
|
|
118
|
+
attr_writer :wait_for_ms
|
|
119
|
+
|
|
103
120
|
sig do
|
|
104
121
|
params(
|
|
105
122
|
url: String,
|
|
@@ -112,8 +129,10 @@ module ContextDev
|
|
|
112
129
|
max_pages: Integer,
|
|
113
130
|
parse_pdf: T::Boolean,
|
|
114
131
|
shorten_base64_images: T::Boolean,
|
|
132
|
+
timeout_ms: Integer,
|
|
115
133
|
url_regex: String,
|
|
116
134
|
use_main_content_only: T::Boolean,
|
|
135
|
+
wait_for_ms: Integer,
|
|
117
136
|
request_options: ContextDev::RequestOptions::OrHash
|
|
118
137
|
).returns(T.attached_class)
|
|
119
138
|
end
|
|
@@ -145,11 +164,18 @@ module ContextDev
|
|
|
145
164
|
parse_pdf: nil,
|
|
146
165
|
# Truncate base64-encoded image data in the Markdown output
|
|
147
166
|
shorten_base64_images: nil,
|
|
167
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
168
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
169
|
+
# value is 300000ms (5 minutes).
|
|
170
|
+
timeout_ms: nil,
|
|
148
171
|
# Regex pattern. Only URLs matching this pattern will be followed and scraped.
|
|
149
172
|
url_regex: nil,
|
|
150
173
|
# Extract only the main content, stripping headers, footers, sidebars, and
|
|
151
174
|
# navigation
|
|
152
175
|
use_main_content_only: nil,
|
|
176
|
+
# Optional browser wait time in milliseconds after initial page load for each
|
|
177
|
+
# crawled page. Min: 0. Max: 30000 (30 seconds).
|
|
178
|
+
wait_for_ms: nil,
|
|
153
179
|
request_options: {}
|
|
154
180
|
)
|
|
155
181
|
end
|
|
@@ -167,8 +193,10 @@ module ContextDev
|
|
|
167
193
|
max_pages: Integer,
|
|
168
194
|
parse_pdf: T::Boolean,
|
|
169
195
|
shorten_base64_images: T::Boolean,
|
|
196
|
+
timeout_ms: Integer,
|
|
170
197
|
url_regex: String,
|
|
171
198
|
use_main_content_only: T::Boolean,
|
|
199
|
+
wait_for_ms: Integer,
|
|
172
200
|
request_options: ContextDev::RequestOptions
|
|
173
201
|
}
|
|
174
202
|
)
|
|
@@ -43,12 +43,31 @@ module ContextDev
|
|
|
43
43
|
sig { params(parse_pdf: T::Boolean).void }
|
|
44
44
|
attr_writer :parse_pdf
|
|
45
45
|
|
|
46
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
47
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
48
|
+
# value is 300000ms (5 minutes).
|
|
49
|
+
sig { returns(T.nilable(Integer)) }
|
|
50
|
+
attr_reader :timeout_ms
|
|
51
|
+
|
|
52
|
+
sig { params(timeout_ms: Integer).void }
|
|
53
|
+
attr_writer :timeout_ms
|
|
54
|
+
|
|
55
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
56
|
+
# 30000 (30 seconds).
|
|
57
|
+
sig { returns(T.nilable(Integer)) }
|
|
58
|
+
attr_reader :wait_for_ms
|
|
59
|
+
|
|
60
|
+
sig { params(wait_for_ms: Integer).void }
|
|
61
|
+
attr_writer :wait_for_ms
|
|
62
|
+
|
|
46
63
|
sig do
|
|
47
64
|
params(
|
|
48
65
|
url: String,
|
|
49
66
|
include_frames: T::Boolean,
|
|
50
67
|
max_age_ms: Integer,
|
|
51
68
|
parse_pdf: T::Boolean,
|
|
69
|
+
timeout_ms: Integer,
|
|
70
|
+
wait_for_ms: Integer,
|
|
52
71
|
request_options: ContextDev::RequestOptions::OrHash
|
|
53
72
|
).returns(T.attached_class)
|
|
54
73
|
end
|
|
@@ -65,6 +84,13 @@ module ContextDev
|
|
|
65
84
|
# returned wrapped in <html><pdf>…</pdf></html>. When false, PDF URLs are skipped
|
|
66
85
|
# and a 400 WEBSITE_ACCESS_ERROR is returned.
|
|
67
86
|
parse_pdf: nil,
|
|
87
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
88
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
89
|
+
# value is 300000ms (5 minutes).
|
|
90
|
+
timeout_ms: nil,
|
|
91
|
+
# Optional browser wait time in milliseconds after initial page load. Min: 0. Max:
|
|
92
|
+
# 30000 (30 seconds).
|
|
93
|
+
wait_for_ms: nil,
|
|
68
94
|
request_options: {}
|
|
69
95
|
)
|
|
70
96
|
end
|
|
@@ -76,6 +102,8 @@ module ContextDev
|
|
|
76
102
|
include_frames: T::Boolean,
|
|
77
103
|
max_age_ms: Integer,
|
|
78
104
|
parse_pdf: T::Boolean,
|
|
105
|
+
timeout_ms: Integer,
|
|
106
|
+
wait_for_ms: Integer,
|
|
79
107
|
request_options: ContextDev::RequestOptions
|
|
80
108
|
}
|
|
81
109
|
)
|
|
@@ -40,11 +40,30 @@ module ContextDev
|
|
|
40
40
|
sig { params(max_age_ms: Integer).void }
|
|
41
41
|
attr_writer :max_age_ms
|
|
42
42
|
|
|
43
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
44
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
45
|
+
# value is 300000ms (5 minutes).
|
|
46
|
+
sig { returns(T.nilable(Integer)) }
|
|
47
|
+
attr_reader :timeout_ms
|
|
48
|
+
|
|
49
|
+
sig { params(timeout_ms: Integer).void }
|
|
50
|
+
attr_writer :timeout_ms
|
|
51
|
+
|
|
52
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
53
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
54
|
+
sig { returns(T.nilable(Integer)) }
|
|
55
|
+
attr_reader :wait_for_ms
|
|
56
|
+
|
|
57
|
+
sig { params(wait_for_ms: Integer).void }
|
|
58
|
+
attr_writer :wait_for_ms
|
|
59
|
+
|
|
43
60
|
sig do
|
|
44
61
|
params(
|
|
45
62
|
url: String,
|
|
46
63
|
enrichment: ContextDev::WebWebScrapeImagesParams::Enrichment::OrHash,
|
|
47
64
|
max_age_ms: Integer,
|
|
65
|
+
timeout_ms: Integer,
|
|
66
|
+
wait_for_ms: Integer,
|
|
48
67
|
request_options: ContextDev::RequestOptions::OrHash
|
|
49
68
|
).returns(T.attached_class)
|
|
50
69
|
end
|
|
@@ -57,6 +76,13 @@ module ContextDev
|
|
|
57
76
|
# Reuse a cached result this many milliseconds old or newer. Default: 86400000 (1
|
|
58
77
|
# day). Set to 0 to bypass cache. Maximum: 2592000000 (30 days).
|
|
59
78
|
max_age_ms: nil,
|
|
79
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
80
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
81
|
+
# value is 300000ms (5 minutes).
|
|
82
|
+
timeout_ms: nil,
|
|
83
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
84
|
+
# collecting images. Min: 0. Max: 30000 (30 seconds).
|
|
85
|
+
wait_for_ms: nil,
|
|
60
86
|
request_options: {}
|
|
61
87
|
)
|
|
62
88
|
end
|
|
@@ -67,6 +93,8 @@ module ContextDev
|
|
|
67
93
|
url: String,
|
|
68
94
|
enrichment: ContextDev::WebWebScrapeImagesParams::Enrichment,
|
|
69
95
|
max_age_ms: Integer,
|
|
96
|
+
timeout_ms: Integer,
|
|
97
|
+
wait_for_ms: Integer,
|
|
70
98
|
request_options: ContextDev::RequestOptions
|
|
71
99
|
}
|
|
72
100
|
)
|
|
@@ -62,6 +62,15 @@ module ContextDev
|
|
|
62
62
|
sig { params(shorten_base64_images: T::Boolean).void }
|
|
63
63
|
attr_writer :shorten_base64_images
|
|
64
64
|
|
|
65
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
66
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
67
|
+
# value is 300000ms (5 minutes).
|
|
68
|
+
sig { returns(T.nilable(Integer)) }
|
|
69
|
+
attr_reader :timeout_ms
|
|
70
|
+
|
|
71
|
+
sig { params(timeout_ms: Integer).void }
|
|
72
|
+
attr_writer :timeout_ms
|
|
73
|
+
|
|
65
74
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
66
75
|
# and navigation
|
|
67
76
|
sig { returns(T.nilable(T::Boolean)) }
|
|
@@ -70,6 +79,14 @@ module ContextDev
|
|
|
70
79
|
sig { params(use_main_content_only: T::Boolean).void }
|
|
71
80
|
attr_writer :use_main_content_only
|
|
72
81
|
|
|
82
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
83
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
84
|
+
sig { returns(T.nilable(Integer)) }
|
|
85
|
+
attr_reader :wait_for_ms
|
|
86
|
+
|
|
87
|
+
sig { params(wait_for_ms: Integer).void }
|
|
88
|
+
attr_writer :wait_for_ms
|
|
89
|
+
|
|
73
90
|
sig do
|
|
74
91
|
params(
|
|
75
92
|
url: String,
|
|
@@ -79,7 +96,9 @@ module ContextDev
|
|
|
79
96
|
max_age_ms: Integer,
|
|
80
97
|
parse_pdf: T::Boolean,
|
|
81
98
|
shorten_base64_images: T::Boolean,
|
|
99
|
+
timeout_ms: Integer,
|
|
82
100
|
use_main_content_only: T::Boolean,
|
|
101
|
+
wait_for_ms: Integer,
|
|
83
102
|
request_options: ContextDev::RequestOptions::OrHash
|
|
84
103
|
).returns(T.attached_class)
|
|
85
104
|
end
|
|
@@ -103,9 +122,16 @@ module ContextDev
|
|
|
103
122
|
parse_pdf: nil,
|
|
104
123
|
# Shorten base64-encoded image data in the Markdown output
|
|
105
124
|
shorten_base64_images: nil,
|
|
125
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
126
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
127
|
+
# value is 300000ms (5 minutes).
|
|
128
|
+
timeout_ms: nil,
|
|
106
129
|
# Extract only the main content of the page, excluding headers, footers, sidebars,
|
|
107
130
|
# and navigation
|
|
108
131
|
use_main_content_only: nil,
|
|
132
|
+
# Optional browser wait time in milliseconds after initial page load before
|
|
133
|
+
# converting the page to Markdown. Min: 0. Max: 30000 (30 seconds).
|
|
134
|
+
wait_for_ms: nil,
|
|
109
135
|
request_options: {}
|
|
110
136
|
)
|
|
111
137
|
end
|
|
@@ -120,7 +146,9 @@ module ContextDev
|
|
|
120
146
|
max_age_ms: Integer,
|
|
121
147
|
parse_pdf: T::Boolean,
|
|
122
148
|
shorten_base64_images: T::Boolean,
|
|
149
|
+
timeout_ms: Integer,
|
|
123
150
|
use_main_content_only: T::Boolean,
|
|
151
|
+
wait_for_ms: Integer,
|
|
124
152
|
request_options: ContextDev::RequestOptions
|
|
125
153
|
}
|
|
126
154
|
)
|
|
@@ -26,6 +26,15 @@ module ContextDev
|
|
|
26
26
|
sig { params(max_links: Integer).void }
|
|
27
27
|
attr_writer :max_links
|
|
28
28
|
|
|
29
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
30
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
31
|
+
# value is 300000ms (5 minutes).
|
|
32
|
+
sig { returns(T.nilable(Integer)) }
|
|
33
|
+
attr_reader :timeout_ms
|
|
34
|
+
|
|
35
|
+
sig { params(timeout_ms: Integer).void }
|
|
36
|
+
attr_writer :timeout_ms
|
|
37
|
+
|
|
29
38
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
30
39
|
# returned and counted against maxLinks.
|
|
31
40
|
sig { returns(T.nilable(String)) }
|
|
@@ -38,6 +47,7 @@ module ContextDev
|
|
|
38
47
|
params(
|
|
39
48
|
domain: String,
|
|
40
49
|
max_links: Integer,
|
|
50
|
+
timeout_ms: Integer,
|
|
41
51
|
url_regex: String,
|
|
42
52
|
request_options: ContextDev::RequestOptions::OrHash
|
|
43
53
|
).returns(T.attached_class)
|
|
@@ -48,6 +58,10 @@ module ContextDev
|
|
|
48
58
|
# Maximum number of links to return from the sitemap crawl. Defaults to 10,000.
|
|
49
59
|
# Minimum is 1, maximum is 100,000.
|
|
50
60
|
max_links: nil,
|
|
61
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
62
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
63
|
+
# value is 300000ms (5 minutes).
|
|
64
|
+
timeout_ms: nil,
|
|
51
65
|
# Optional RE2-compatible regex pattern. Only URLs matching this pattern are
|
|
52
66
|
# returned and counted against maxLinks.
|
|
53
67
|
url_regex: nil,
|
|
@@ -60,6 +74,7 @@ module ContextDev
|
|
|
60
74
|
{
|
|
61
75
|
domain: String,
|
|
62
76
|
max_links: Integer,
|
|
77
|
+
timeout_ms: Integer,
|
|
63
78
|
url_regex: String,
|
|
64
79
|
request_options: ContextDev::RequestOptions
|
|
65
80
|
}
|
|
@@ -48,8 +48,9 @@ module ContextDev
|
|
|
48
48
|
# younger than this many milliseconds. Defaults to 7 days (604800000 ms) when
|
|
49
49
|
# omitted. Max is 30 days (2592000000 ms). Set to 0 to always scrape fresh.
|
|
50
50
|
max_age_ms: nil,
|
|
51
|
-
# Optional timeout in milliseconds for the request.
|
|
52
|
-
#
|
|
51
|
+
# Optional timeout in milliseconds for the request. If the request takes longer
|
|
52
|
+
# than this value, it will be aborted with a 408 status code. Maximum allowed
|
|
53
|
+
# value is 300000ms (5 minutes).
|
|
53
54
|
timeout_ms: nil,
|
|
54
55
|
request_options: {}
|
|
55
56
|
)
|