crawlberg 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/sig/types.rbs CHANGED
@@ -1,530 +1,530 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:06831f8166c6d860691af36ee02b72ae3246568eb2e5c67ed5d11da71d02afeb
2
+ # alef:hash:23d662f17ccee663375ea978facec5b4b691adf30860c73224d58efb602c12d2
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
 
6
6
  module Crawlberg
7
7
 
8
- VERSION: String
8
+ VERSION: String
9
9
 
10
- type json_value = Hash[String, untyped] | Array[untyped] | String | Integer | Float | bool | nil
10
+ type json_value = Hash[String, untyped] | Array[untyped] | String | Integer | Float | bool | nil
11
11
 
12
- class ExtractionMeta
13
- attr_accessor cost: Float?
14
- attr_accessor prompt_tokens: Integer?
15
- attr_accessor completion_tokens: Integer?
16
- attr_accessor model: String?
17
- attr_accessor chunks_processed: Integer?
12
+ class ExtractionMeta
13
+ attr_accessor cost: Float?
14
+ attr_accessor prompt_tokens: Integer?
15
+ attr_accessor completion_tokens: Integer?
16
+ attr_accessor model: String?
17
+ attr_accessor chunks_processed: Integer?
18
18
 
19
19
  def initialize: (?cost: Float, ?prompt_tokens: Integer, ?completion_tokens: Integer, ?model: String, ?chunks_processed: Integer) -> void
20
- end
20
+ end
21
21
 
22
- class ProxyConfig
23
- attr_accessor url: String?
24
- attr_accessor username: String?
25
- attr_accessor password: String?
22
+ class ProxyConfig
23
+ attr_accessor url: String?
24
+ attr_accessor username: String?
25
+ attr_accessor password: String?
26
26
 
27
27
  def initialize: (?url: String, ?username: String, ?password: String) -> void
28
- end
28
+ end
29
29
 
30
- class ContentConfig
31
- attr_accessor output_format: String?
32
- attr_accessor preprocessing_preset: String?
33
- attr_accessor remove_navigation: bool?
34
- attr_accessor remove_forms: bool?
30
+ class ContentConfig
31
+ attr_accessor output_format: String?
32
+ attr_accessor preprocessing_preset: String?
33
+ attr_accessor remove_navigation: bool?
34
+ attr_accessor remove_forms: bool?
35
35
  attr_accessor strip_tags: Array[String]?
36
36
  attr_accessor preserve_tags: Array[String]?
37
37
  attr_accessor exclude_selectors: Array[String]?
38
- attr_accessor skip_images: bool?
39
- attr_accessor max_depth: Integer?
40
- attr_accessor wrap: bool?
41
- attr_accessor wrap_width: Integer?
42
- attr_accessor include_document_structure: bool?
38
+ attr_accessor skip_images: bool?
39
+ attr_accessor max_depth: Integer?
40
+ attr_accessor wrap: bool?
41
+ attr_accessor wrap_width: Integer?
42
+ attr_accessor include_document_structure: bool?
43
43
 
44
- def initialize: (?output_format: String, ?preprocessing_preset: String, ?remove_navigation: bool, ?remove_forms: bool, ?strip_tags: Array[String], ?preserve_tags: Array[String], ?exclude_selectors: Array[String], ?skip_images: bool, ?max_depth: Integer, ?wrap: bool, ?wrap_width: Integer, ?include_document_structure: bool) -> void
44
+ def initialize: (?output_format: String, ?preprocessing_preset: String, ?remove_navigation: bool, ?remove_forms: bool, ?strip_tags: Array[String], ?preserve_tags: Array[String], ?exclude_selectors: Array[String], ?skip_images: bool, ?max_depth: Integer, ?wrap: bool, ?wrap_width: Integer, ?include_document_structure: bool) -> void
45
45
  def self.default: () -> ContentConfig
46
- end
47
-
48
- class BrowserConfig
49
- attr_accessor mode: BrowserMode?
50
- attr_accessor backend: BrowserBackend?
51
- attr_accessor endpoint: String?
52
- attr_accessor timeout: Integer?
53
- attr_accessor wait: BrowserWait?
54
- attr_accessor wait_selector: String?
55
- attr_accessor extra_wait: Integer?
56
- attr_accessor proxy: ProxyConfig?
46
+ end
47
+
48
+ class BrowserConfig
49
+ attr_accessor mode: BrowserMode?
50
+ attr_accessor backend: BrowserBackend?
51
+ attr_accessor endpoint: String?
52
+ attr_accessor timeout: Integer?
53
+ attr_accessor wait: BrowserWait?
54
+ attr_accessor wait_selector: String?
55
+ attr_accessor extra_wait: Integer?
56
+ attr_accessor proxy: ProxyConfig?
57
57
  attr_accessor block_url_patterns: Array[String]?
58
- attr_accessor eval_script: String?
59
- attr_accessor robots_user_agent: String?
60
- attr_accessor capture_network_events: bool?
61
- attr_accessor session_affinity: bool?
58
+ attr_accessor eval_script: String?
59
+ attr_accessor robots_user_agent: String?
60
+ attr_accessor capture_network_events: bool?
61
+ attr_accessor session_affinity: bool?
62
62
 
63
- def initialize: (?mode: BrowserMode, ?backend: BrowserBackend, ?endpoint: String, ?timeout: Integer, ?wait: BrowserWait, ?wait_selector: String, ?extra_wait: Integer, ?proxy: ProxyConfig, ?block_url_patterns: Array[String], ?eval_script: String, ?robots_user_agent: String, ?capture_network_events: bool, ?session_affinity: bool) -> void
63
+ def initialize: (?mode: BrowserMode, ?backend: BrowserBackend, ?endpoint: String, ?timeout: Integer, ?wait: BrowserWait, ?wait_selector: String, ?extra_wait: Integer, ?proxy: ProxyConfig, ?block_url_patterns: Array[String], ?eval_script: String, ?robots_user_agent: String, ?capture_network_events: bool, ?session_affinity: bool) -> void
64
64
  def self.default: () -> BrowserConfig
65
- end
66
-
67
- class CrawlConfig
68
- attr_accessor max_depth: Integer?
69
- attr_accessor max_pages: Integer?
70
- attr_accessor max_concurrent: Integer?
71
- attr_accessor respect_robots_txt: bool?
72
- attr_accessor soft_http_errors: bool?
73
- attr_accessor user_agent: String?
74
- attr_accessor stay_on_domain: bool?
75
- attr_accessor allow_subdomains: bool?
65
+ end
66
+
67
+ class CrawlConfig
68
+ attr_accessor max_depth: Integer?
69
+ attr_accessor max_pages: Integer?
70
+ attr_accessor max_concurrent: Integer?
71
+ attr_accessor respect_robots_txt: bool?
72
+ attr_accessor soft_http_errors: bool?
73
+ attr_accessor user_agent: String?
74
+ attr_accessor stay_on_domain: bool?
75
+ attr_accessor allow_subdomains: bool?
76
76
  attr_accessor include_paths: Array[String]?
77
77
  attr_accessor exclude_paths: Array[String]?
78
78
  attr_accessor custom_headers: Hash[String, String]?
79
- attr_accessor request_timeout: Integer?
80
- attr_accessor rate_limit_ms: Integer?
81
- attr_accessor max_redirects: Integer?
82
- attr_accessor retry_count: Integer?
79
+ attr_accessor request_timeout: Integer?
80
+ attr_accessor rate_limit_ms: Integer?
81
+ attr_accessor max_redirects: Integer?
82
+ attr_accessor retry_count: Integer?
83
83
  attr_accessor retry_codes: Array[Integer]?
84
- attr_accessor cookies_enabled: bool?
85
- attr_accessor auth: AuthConfig?
86
- attr_accessor max_body_size: Integer?
84
+ attr_accessor cookies_enabled: bool?
85
+ attr_accessor auth: AuthConfig?
86
+ attr_accessor max_body_size: Integer?
87
87
  attr_accessor remove_tags: Array[String]?
88
- attr_accessor content: ContentConfig?
89
- attr_accessor map_limit: Integer?
90
- attr_accessor map_search: String?
91
- attr_accessor download_assets: bool?
88
+ attr_accessor content: ContentConfig?
89
+ attr_accessor map_limit: Integer?
90
+ attr_accessor map_search: String?
91
+ attr_accessor download_assets: bool?
92
92
  attr_accessor asset_types: Array[AssetCategory]?
93
- attr_accessor max_asset_size: Integer?
94
- attr_accessor browser: BrowserConfig?
95
- attr_accessor proxy: ProxyConfig?
93
+ attr_accessor max_asset_size: Integer?
94
+ attr_accessor browser: BrowserConfig?
95
+ attr_accessor proxy: ProxyConfig?
96
96
  attr_accessor user_agents: Array[String]?
97
- attr_accessor capture_screenshot: bool?
98
- attr_accessor follow_document_urls: bool?
99
- attr_accessor document_url_depth: Integer?
100
- attr_accessor download_documents: bool?
101
- attr_accessor document_max_size: Integer?
97
+ attr_accessor capture_screenshot: bool?
98
+ attr_accessor follow_document_urls: bool?
99
+ attr_accessor document_url_depth: Integer?
100
+ attr_accessor download_documents: bool?
101
+ attr_accessor document_max_size: Integer?
102
102
  attr_accessor document_mime_types: Array[String]?
103
- attr_accessor warc_output: String?
104
- attr_accessor browser_profile: String?
105
- attr_accessor save_browser_profile: bool?
106
- attr_accessor ssrf: SsrfPolicy?
103
+ attr_accessor warc_output: String?
104
+ attr_accessor browser_profile: String?
105
+ attr_accessor save_browser_profile: bool?
106
+ attr_accessor ssrf: SsrfPolicy?
107
107
 
108
- def initialize: (?max_depth: Integer, ?max_pages: Integer, ?max_concurrent: Integer, ?respect_robots_txt: bool, ?soft_http_errors: bool, ?user_agent: String, ?stay_on_domain: bool, ?allow_subdomains: bool, ?include_paths: Array[String], ?exclude_paths: Array[String], ?custom_headers: Hash[String, String], ?request_timeout: Integer, ?rate_limit_ms: Integer, ?max_redirects: Integer, ?retry_count: Integer, ?retry_codes: Array[Integer], ?cookies_enabled: bool, ?auth: AuthConfig, ?max_body_size: Integer, ?remove_tags: Array[String], ?content: ContentConfig, ?map_limit: Integer, ?map_search: String, ?download_assets: bool, ?asset_types: Array[AssetCategory], ?max_asset_size: Integer, ?browser: BrowserConfig, ?proxy: ProxyConfig, ?user_agents: Array[String], ?capture_screenshot: bool, ?follow_document_urls: bool, ?document_url_depth: Integer, ?download_documents: bool, ?document_max_size: Integer, ?document_mime_types: Array[String], ?warc_output: String, ?browser_profile: String, ?save_browser_profile: bool, ?ssrf: SsrfPolicy) -> void
108
+ def initialize: (?max_depth: Integer, ?max_pages: Integer, ?max_concurrent: Integer, ?respect_robots_txt: bool, ?soft_http_errors: bool, ?user_agent: String, ?stay_on_domain: bool, ?allow_subdomains: bool, ?include_paths: Array[String], ?exclude_paths: Array[String], ?custom_headers: Hash[String, String], ?request_timeout: Integer, ?rate_limit_ms: Integer, ?max_redirects: Integer, ?retry_count: Integer, ?retry_codes: Array[Integer], ?cookies_enabled: bool, ?auth: AuthConfig, ?max_body_size: Integer, ?remove_tags: Array[String], ?content: ContentConfig, ?map_limit: Integer, ?map_search: String, ?download_assets: bool, ?asset_types: Array[AssetCategory], ?max_asset_size: Integer, ?browser: BrowserConfig, ?proxy: ProxyConfig, ?user_agents: Array[String], ?capture_screenshot: bool, ?follow_document_urls: bool, ?document_url_depth: Integer, ?download_documents: bool, ?document_max_size: Integer, ?document_mime_types: Array[String], ?warc_output: String, ?browser_profile: String, ?save_browser_profile: bool, ?ssrf: SsrfPolicy) -> void
109
109
  def validate: () -> void
110
110
  def self.default: () -> CrawlConfig
111
- end
111
+ end
112
112
 
113
- class BrowserExtras
114
- attr_accessor eval_result: json_value?
113
+ class BrowserExtras
114
+ attr_accessor eval_result: json_value?
115
115
  attr_accessor network_events: Array[ResponseMeta]?
116
116
  attr_accessor cookies: Array[CookieInfo]?
117
117
 
118
- def initialize: (?eval_result: json_value, ?network_events: Array[ResponseMeta], ?cookies: Array[CookieInfo]) -> void
119
- end
118
+ def initialize: (?eval_result: json_value, ?network_events: Array[ResponseMeta], ?cookies: Array[CookieInfo]) -> void
119
+ end
120
120
 
121
- class DownloadedDocument
122
- attr_accessor url: String?
123
- attr_accessor mime_type: String?
124
- attr_accessor size: Integer?
125
- attr_accessor filename: String?
126
- attr_accessor content_hash: String?
121
+ class DownloadedDocument
122
+ attr_accessor url: String?
123
+ attr_accessor mime_type: String?
124
+ attr_accessor size: Integer?
125
+ attr_accessor filename: String?
126
+ attr_accessor content_hash: String?
127
127
  attr_accessor headers: Hash[String, String]?
128
128
 
129
- def initialize: (?url: String, ?mime_type: String, ?size: Integer, ?filename: String, ?content_hash: String, ?headers: Hash[String, String]) -> void
130
- end
129
+ def initialize: (?url: String, ?mime_type: String, ?size: Integer, ?filename: String, ?content_hash: String, ?headers: Hash[String, String]) -> void
130
+ end
131
131
 
132
- class InteractionResult
132
+ class InteractionResult
133
133
  attr_accessor action_results: Array[ActionResult]?
134
- attr_accessor final_html: String?
135
- attr_accessor final_url: String?
134
+ attr_accessor final_html: String?
135
+ attr_accessor final_url: String?
136
136
 
137
- def initialize: (?action_results: Array[ActionResult], ?final_html: String, ?final_url: String) -> void
138
- end
137
+ def initialize: (?action_results: Array[ActionResult], ?final_html: String, ?final_url: String) -> void
138
+ end
139
139
 
140
- class ActionResult
141
- attr_accessor action_index: Integer?
142
- attr_accessor action_type: String?
143
- attr_accessor success: bool?
144
- attr_accessor data: json_value?
145
- attr_accessor error: String?
140
+ class ActionResult
141
+ attr_accessor action_index: Integer?
142
+ attr_accessor action_type: String?
143
+ attr_accessor success: bool?
144
+ attr_accessor data: json_value?
145
+ attr_accessor error: String?
146
146
 
147
147
  def initialize: (?action_index: Integer, ?action_type: String, ?success: bool, ?data: json_value, ?error: String) -> void
148
- end
149
-
150
- class ScrapeResult
151
- attr_accessor status_code: Integer?
152
- attr_accessor final_url: String?
153
- attr_accessor content_type: String?
154
- attr_accessor html: String?
155
- attr_accessor body_size: Integer?
156
- attr_accessor metadata: PageMetadata?
148
+ end
149
+
150
+ class ScrapeResult
151
+ attr_accessor status_code: Integer?
152
+ attr_accessor final_url: String?
153
+ attr_accessor content_type: String?
154
+ attr_accessor html: String?
155
+ attr_accessor body_size: Integer?
156
+ attr_accessor metadata: PageMetadata?
157
157
  attr_accessor links: Array[LinkInfo]?
158
158
  attr_accessor images: Array[ImageInfo]?
159
159
  attr_accessor feeds: Array[FeedInfo]?
160
160
  attr_accessor json_ld: Array[JsonLdEntry]?
161
- attr_accessor is_allowed: bool?
162
- attr_accessor crawl_delay: Integer?
163
- attr_accessor noindex_detected: bool?
164
- attr_accessor nofollow_detected: bool?
165
- attr_accessor x_robots_tag: String?
166
- attr_accessor is_pdf: bool?
167
- attr_accessor was_skipped: bool?
168
- attr_accessor detected_charset: String?
169
- attr_accessor auth_header_sent: bool?
170
- attr_accessor response_meta: ResponseMeta?
161
+ attr_accessor is_allowed: bool?
162
+ attr_accessor crawl_delay: Integer?
163
+ attr_accessor noindex_detected: bool?
164
+ attr_accessor nofollow_detected: bool?
165
+ attr_accessor x_robots_tag: String?
166
+ attr_accessor is_pdf: bool?
167
+ attr_accessor was_skipped: bool?
168
+ attr_accessor detected_charset: String?
169
+ attr_accessor auth_header_sent: bool?
170
+ attr_accessor response_meta: ResponseMeta?
171
171
  attr_accessor assets: Array[DownloadedAsset]?
172
- attr_accessor js_render_hint: bool?
173
- attr_accessor browser_used: bool?
174
- attr_accessor markdown: MarkdownResult?
175
- attr_accessor extracted_data: json_value?
176
- attr_accessor extraction_meta: ExtractionMeta?
177
- attr_accessor downloaded_document: DownloadedDocument?
178
- attr_accessor browser: BrowserExtras?
179
-
180
- def initialize: (?status_code: Integer, ?final_url: String, ?content_type: String, ?html: String, ?body_size: Integer, ?metadata: PageMetadata, ?links: Array[LinkInfo], ?images: Array[ImageInfo], ?feeds: Array[FeedInfo], ?json_ld: Array[JsonLdEntry], ?is_allowed: bool, ?crawl_delay: Integer, ?noindex_detected: bool, ?nofollow_detected: bool, ?x_robots_tag: String, ?is_pdf: bool, ?was_skipped: bool, ?detected_charset: String, ?auth_header_sent: bool, ?response_meta: ResponseMeta, ?assets: Array[DownloadedAsset], ?js_render_hint: bool, ?browser_used: bool, ?markdown: MarkdownResult, ?extracted_data: json_value, ?extraction_meta: ExtractionMeta, ?downloaded_document: DownloadedDocument, ?browser: BrowserExtras) -> void
181
- end
182
-
183
- class CrawlPageResult
184
- attr_accessor url: String?
185
- attr_accessor normalized_url: String?
186
- attr_accessor status_code: Integer?
187
- attr_accessor content_type: String?
188
- attr_accessor html: String?
189
- attr_accessor body_size: Integer?
190
- attr_accessor metadata: PageMetadata?
172
+ attr_accessor js_render_hint: bool?
173
+ attr_accessor browser_used: bool?
174
+ attr_accessor markdown: MarkdownResult?
175
+ attr_accessor extracted_data: json_value?
176
+ attr_accessor extraction_meta: ExtractionMeta?
177
+ attr_accessor downloaded_document: DownloadedDocument?
178
+ attr_accessor browser: BrowserExtras?
179
+
180
+ def initialize: (?status_code: Integer, ?final_url: String, ?content_type: String, ?html: String, ?body_size: Integer, ?metadata: PageMetadata, ?links: Array[LinkInfo], ?images: Array[ImageInfo], ?feeds: Array[FeedInfo], ?json_ld: Array[JsonLdEntry], ?is_allowed: bool, ?crawl_delay: Integer, ?noindex_detected: bool, ?nofollow_detected: bool, ?x_robots_tag: String, ?is_pdf: bool, ?was_skipped: bool, ?detected_charset: String, ?auth_header_sent: bool, ?response_meta: ResponseMeta, ?assets: Array[DownloadedAsset], ?js_render_hint: bool, ?browser_used: bool, ?markdown: MarkdownResult, ?extracted_data: json_value, ?extraction_meta: ExtractionMeta, ?downloaded_document: DownloadedDocument, ?browser: BrowserExtras) -> void
181
+ end
182
+
183
+ class CrawlPageResult
184
+ attr_accessor url: String?
185
+ attr_accessor normalized_url: String?
186
+ attr_accessor status_code: Integer?
187
+ attr_accessor content_type: String?
188
+ attr_accessor html: String?
189
+ attr_accessor body_size: Integer?
190
+ attr_accessor metadata: PageMetadata?
191
191
  attr_accessor links: Array[LinkInfo]?
192
192
  attr_accessor images: Array[ImageInfo]?
193
193
  attr_accessor feeds: Array[FeedInfo]?
194
194
  attr_accessor json_ld: Array[JsonLdEntry]?
195
- attr_accessor depth: Integer?
196
- attr_accessor stayed_on_domain: bool?
197
- attr_accessor was_skipped: bool?
198
- attr_accessor is_pdf: bool?
199
- attr_accessor detected_charset: String?
200
- attr_accessor markdown: MarkdownResult?
201
- attr_accessor extracted_data: json_value?
202
- attr_accessor extraction_meta: ExtractionMeta?
203
- attr_accessor downloaded_document: DownloadedDocument?
204
- attr_accessor browser_used: bool?
205
-
206
- def initialize: (?url: String, ?normalized_url: String, ?status_code: Integer, ?content_type: String, ?html: String, ?body_size: Integer, ?metadata: PageMetadata, ?links: Array[LinkInfo], ?images: Array[ImageInfo], ?feeds: Array[FeedInfo], ?json_ld: Array[JsonLdEntry], ?depth: Integer, ?stayed_on_domain: bool, ?was_skipped: bool, ?is_pdf: bool, ?detected_charset: String, ?markdown: MarkdownResult, ?extracted_data: json_value, ?extraction_meta: ExtractionMeta, ?downloaded_document: DownloadedDocument, ?browser_used: bool) -> void
207
- end
208
-
209
- class CrawlResult
195
+ attr_accessor depth: Integer?
196
+ attr_accessor stayed_on_domain: bool?
197
+ attr_accessor was_skipped: bool?
198
+ attr_accessor is_pdf: bool?
199
+ attr_accessor detected_charset: String?
200
+ attr_accessor markdown: MarkdownResult?
201
+ attr_accessor extracted_data: json_value?
202
+ attr_accessor extraction_meta: ExtractionMeta?
203
+ attr_accessor downloaded_document: DownloadedDocument?
204
+ attr_accessor browser_used: bool?
205
+
206
+ def initialize: (?url: String, ?normalized_url: String, ?status_code: Integer, ?content_type: String, ?html: String, ?body_size: Integer, ?metadata: PageMetadata, ?links: Array[LinkInfo], ?images: Array[ImageInfo], ?feeds: Array[FeedInfo], ?json_ld: Array[JsonLdEntry], ?depth: Integer, ?stayed_on_domain: bool, ?was_skipped: bool, ?is_pdf: bool, ?detected_charset: String, ?markdown: MarkdownResult, ?extracted_data: json_value, ?extraction_meta: ExtractionMeta, ?downloaded_document: DownloadedDocument, ?browser_used: bool) -> void
207
+ end
208
+
209
+ class CrawlResult
210
210
  attr_accessor pages: Array[CrawlPageResult]?
211
- attr_accessor final_url: String?
212
- attr_accessor redirect_count: Integer?
213
- attr_accessor was_skipped: bool?
214
- attr_accessor error: String?
211
+ attr_accessor final_url: String?
212
+ attr_accessor redirect_count: Integer?
213
+ attr_accessor was_skipped: bool?
214
+ attr_accessor error: String?
215
215
  attr_accessor cookies: Array[CookieInfo]?
216
- attr_accessor stayed_on_domain: bool?
217
- attr_accessor browser_used: bool?
216
+ attr_accessor stayed_on_domain: bool?
217
+ attr_accessor browser_used: bool?
218
218
 
219
- def initialize: (?pages: Array[CrawlPageResult], ?final_url: String, ?redirect_count: Integer, ?was_skipped: bool, ?error: String, ?cookies: Array[CookieInfo], ?stayed_on_domain: bool, ?browser_used: bool) -> void
219
+ def initialize: (?pages: Array[CrawlPageResult], ?final_url: String, ?redirect_count: Integer, ?was_skipped: bool, ?error: String, ?cookies: Array[CookieInfo], ?stayed_on_domain: bool, ?browser_used: bool) -> void
220
220
  def unique_normalized_urls: () -> Integer
221
- end
221
+ end
222
222
 
223
- class SitemapUrl
224
- attr_accessor url: String?
225
- attr_accessor lastmod: String?
226
- attr_accessor changefreq: String?
227
- attr_accessor priority: String?
223
+ class SitemapUrl
224
+ attr_accessor url: String?
225
+ attr_accessor lastmod: String?
226
+ attr_accessor changefreq: String?
227
+ attr_accessor priority: String?
228
228
 
229
229
  def initialize: (?url: String, ?lastmod: String, ?changefreq: String, ?priority: String) -> void
230
- end
230
+ end
231
231
 
232
- class MapResult
232
+ class MapResult
233
233
  attr_accessor urls: Array[SitemapUrl]?
234
234
 
235
- def initialize: (?urls: Array[SitemapUrl]) -> void
236
- end
235
+ def initialize: (?urls: Array[SitemapUrl]) -> void
236
+ end
237
237
 
238
- class MarkdownResult
239
- attr_accessor content: String?
240
- attr_accessor document_structure: json_value?
238
+ class MarkdownResult
239
+ attr_accessor content: String?
240
+ attr_accessor document_structure: json_value?
241
241
  attr_accessor tables: Array[json_value]?
242
242
  attr_accessor warnings: Array[String]?
243
- attr_accessor citations: bool?
244
- attr_accessor fit_content: String?
243
+ attr_accessor citations: bool?
244
+ attr_accessor fit_content: String?
245
245
 
246
- def initialize: (?content: String, ?document_structure: json_value, ?tables: Array[json_value], ?warnings: Array[String], ?citations: bool, ?fit_content: String) -> void
247
- end
246
+ def initialize: (?content: String, ?document_structure: json_value, ?tables: Array[json_value], ?warnings: Array[String], ?citations: bool, ?fit_content: String) -> void
247
+ end
248
248
 
249
- class LinkInfo
250
- attr_accessor url: String?
251
- attr_accessor text: String?
252
- attr_accessor link_type: LinkType?
253
- attr_accessor rel: String?
254
- attr_accessor nofollow: bool?
249
+ class LinkInfo
250
+ attr_accessor url: String?
251
+ attr_accessor text: String?
252
+ attr_accessor link_type: LinkType?
253
+ attr_accessor rel: String?
254
+ attr_accessor nofollow: bool?
255
255
 
256
256
  def initialize: (?url: String, ?text: String, ?link_type: LinkType, ?rel: String, ?nofollow: bool) -> void
257
- end
257
+ end
258
258
 
259
- class ImageInfo
260
- attr_accessor url: String?
261
- attr_accessor alt: String?
262
- attr_accessor width: Integer?
263
- attr_accessor height: Integer?
264
- attr_accessor source: ImageSource?
259
+ class ImageInfo
260
+ attr_accessor url: String?
261
+ attr_accessor alt: String?
262
+ attr_accessor width: Integer?
263
+ attr_accessor height: Integer?
264
+ attr_accessor source: ImageSource?
265
265
 
266
266
  def initialize: (?url: String, ?alt: String, ?width: Integer, ?height: Integer, ?source: ImageSource) -> void
267
- end
267
+ end
268
268
 
269
- class FeedInfo
270
- attr_accessor url: String?
271
- attr_accessor title: String?
272
- attr_accessor feed_type: FeedType?
269
+ class FeedInfo
270
+ attr_accessor url: String?
271
+ attr_accessor title: String?
272
+ attr_accessor feed_type: FeedType?
273
273
 
274
274
  def initialize: (?url: String, ?title: String, ?feed_type: FeedType) -> void
275
- end
275
+ end
276
276
 
277
- class JsonLdEntry
278
- attr_accessor schema_type: String?
279
- attr_accessor name: String?
280
- attr_accessor raw: String?
277
+ class JsonLdEntry
278
+ attr_accessor schema_type: String?
279
+ attr_accessor name: String?
280
+ attr_accessor raw: String?
281
281
 
282
282
  def initialize: (?schema_type: String, ?name: String, ?raw: String) -> void
283
- end
283
+ end
284
284
 
285
- class CookieInfo
286
- attr_accessor name: String?
287
- attr_accessor value: String?
288
- attr_accessor domain: String?
289
- attr_accessor path: String?
285
+ class CookieInfo
286
+ attr_accessor name: String?
287
+ attr_accessor value: String?
288
+ attr_accessor domain: String?
289
+ attr_accessor path: String?
290
290
 
291
291
  def initialize: (?name: String, ?value: String, ?domain: String, ?path: String) -> void
292
- end
292
+ end
293
293
 
294
- class DownloadedAsset
295
- attr_accessor url: String?
296
- attr_accessor content_hash: String?
297
- attr_accessor mime_type: String?
298
- attr_accessor size: Integer?
299
- attr_accessor asset_category: AssetCategory?
300
- attr_accessor html_tag: String?
294
+ class DownloadedAsset
295
+ attr_accessor url: String?
296
+ attr_accessor content_hash: String?
297
+ attr_accessor mime_type: String?
298
+ attr_accessor size: Integer?
299
+ attr_accessor asset_category: AssetCategory?
300
+ attr_accessor html_tag: String?
301
301
 
302
302
  def initialize: (?url: String, ?content_hash: String, ?mime_type: String, ?size: Integer, ?asset_category: AssetCategory, ?html_tag: String) -> void
303
- end
303
+ end
304
304
 
305
- class ArticleMetadata
306
- attr_accessor published_time: String?
307
- attr_accessor modified_time: String?
308
- attr_accessor author: String?
309
- attr_accessor section: String?
305
+ class ArticleMetadata
306
+ attr_accessor published_time: String?
307
+ attr_accessor modified_time: String?
308
+ attr_accessor author: String?
309
+ attr_accessor section: String?
310
310
  attr_accessor tags: Array[String]?
311
311
 
312
- def initialize: (?published_time: String, ?modified_time: String, ?author: String, ?section: String, ?tags: Array[String]) -> void
313
- end
312
+ def initialize: (?published_time: String, ?modified_time: String, ?author: String, ?section: String, ?tags: Array[String]) -> void
313
+ end
314
314
 
315
- class HreflangEntry
316
- attr_accessor lang: String?
317
- attr_accessor url: String?
315
+ class HreflangEntry
316
+ attr_accessor lang: String?
317
+ attr_accessor url: String?
318
318
 
319
319
  def initialize: (?lang: String, ?url: String) -> void
320
- end
320
+ end
321
321
 
322
- class FaviconInfo
323
- attr_accessor url: String?
324
- attr_accessor rel: String?
325
- attr_accessor sizes: String?
326
- attr_accessor mime_type: String?
322
+ class FaviconInfo
323
+ attr_accessor url: String?
324
+ attr_accessor rel: String?
325
+ attr_accessor sizes: String?
326
+ attr_accessor mime_type: String?
327
327
 
328
328
  def initialize: (?url: String, ?rel: String, ?sizes: String, ?mime_type: String) -> void
329
- end
329
+ end
330
330
 
331
- class HeadingInfo
332
- attr_accessor level: Integer?
333
- attr_accessor text: String?
331
+ class HeadingInfo
332
+ attr_accessor level: Integer?
333
+ attr_accessor text: String?
334
334
 
335
335
  def initialize: (?level: Integer, ?text: String) -> void
336
- end
336
+ end
337
337
 
338
- class ResponseMeta
339
- attr_accessor etag: String?
340
- attr_accessor last_modified: String?
341
- attr_accessor cache_control: String?
342
- attr_accessor server: String?
343
- attr_accessor x_powered_by: String?
344
- attr_accessor content_language: String?
345
- attr_accessor content_encoding: String?
338
+ class ResponseMeta
339
+ attr_accessor etag: String?
340
+ attr_accessor last_modified: String?
341
+ attr_accessor cache_control: String?
342
+ attr_accessor server: String?
343
+ attr_accessor x_powered_by: String?
344
+ attr_accessor content_language: String?
345
+ attr_accessor content_encoding: String?
346
346
 
347
347
  def initialize: (?etag: String, ?last_modified: String, ?cache_control: String, ?server: String, ?x_powered_by: String, ?content_language: String, ?content_encoding: String) -> void
348
- end
349
-
350
- class PageMetadata
351
- attr_accessor title: String?
352
- attr_accessor description: String?
353
- attr_accessor canonical_url: String?
354
- attr_accessor keywords: String?
355
- attr_accessor author: String?
356
- attr_accessor viewport: String?
357
- attr_accessor theme_color: String?
358
- attr_accessor generator: String?
359
- attr_accessor robots: String?
360
- attr_accessor html_lang: String?
361
- attr_accessor html_dir: String?
362
- attr_accessor og_title: String?
363
- attr_accessor og_type: String?
364
- attr_accessor og_image: String?
365
- attr_accessor og_description: String?
366
- attr_accessor og_url: String?
367
- attr_accessor og_site_name: String?
368
- attr_accessor og_locale: String?
369
- attr_accessor og_video: String?
370
- attr_accessor og_audio: String?
348
+ end
349
+
350
+ class PageMetadata
351
+ attr_accessor title: String?
352
+ attr_accessor description: String?
353
+ attr_accessor canonical_url: String?
354
+ attr_accessor keywords: String?
355
+ attr_accessor author: String?
356
+ attr_accessor viewport: String?
357
+ attr_accessor theme_color: String?
358
+ attr_accessor generator: String?
359
+ attr_accessor robots: String?
360
+ attr_accessor html_lang: String?
361
+ attr_accessor html_dir: String?
362
+ attr_accessor og_title: String?
363
+ attr_accessor og_type: String?
364
+ attr_accessor og_image: String?
365
+ attr_accessor og_description: String?
366
+ attr_accessor og_url: String?
367
+ attr_accessor og_site_name: String?
368
+ attr_accessor og_locale: String?
369
+ attr_accessor og_video: String?
370
+ attr_accessor og_audio: String?
371
371
  attr_accessor og_locale_alternates: Array[String]?
372
- attr_accessor twitter_card: String?
373
- attr_accessor twitter_title: String?
374
- attr_accessor twitter_description: String?
375
- attr_accessor twitter_image: String?
376
- attr_accessor twitter_site: String?
377
- attr_accessor twitter_creator: String?
378
- attr_accessor dc_title: String?
379
- attr_accessor dc_creator: String?
380
- attr_accessor dc_subject: String?
381
- attr_accessor dc_description: String?
382
- attr_accessor dc_publisher: String?
383
- attr_accessor dc_date: String?
384
- attr_accessor dc_type: String?
385
- attr_accessor dc_format: String?
386
- attr_accessor dc_identifier: String?
387
- attr_accessor dc_language: String?
388
- attr_accessor dc_rights: String?
389
- attr_accessor article: ArticleMetadata?
372
+ attr_accessor twitter_card: String?
373
+ attr_accessor twitter_title: String?
374
+ attr_accessor twitter_description: String?
375
+ attr_accessor twitter_image: String?
376
+ attr_accessor twitter_site: String?
377
+ attr_accessor twitter_creator: String?
378
+ attr_accessor dc_title: String?
379
+ attr_accessor dc_creator: String?
380
+ attr_accessor dc_subject: String?
381
+ attr_accessor dc_description: String?
382
+ attr_accessor dc_publisher: String?
383
+ attr_accessor dc_date: String?
384
+ attr_accessor dc_type: String?
385
+ attr_accessor dc_format: String?
386
+ attr_accessor dc_identifier: String?
387
+ attr_accessor dc_language: String?
388
+ attr_accessor dc_rights: String?
389
+ attr_accessor article: ArticleMetadata?
390
390
  attr_accessor hreflangs: Array[HreflangEntry]?
391
391
  attr_accessor favicons: Array[FaviconInfo]?
392
392
  attr_accessor headings: Array[HeadingInfo]?
393
- attr_accessor word_count: Integer?
393
+ attr_accessor word_count: Integer?
394
394
 
395
- def initialize: (?title: String, ?description: String, ?canonical_url: String, ?keywords: String, ?author: String, ?viewport: String, ?theme_color: String, ?generator: String, ?robots: String, ?html_lang: String, ?html_dir: String, ?og_title: String, ?og_type: String, ?og_image: String, ?og_description: String, ?og_url: String, ?og_site_name: String, ?og_locale: String, ?og_video: String, ?og_audio: String, ?og_locale_alternates: Array[String], ?twitter_card: String, ?twitter_title: String, ?twitter_description: String, ?twitter_image: String, ?twitter_site: String, ?twitter_creator: String, ?dc_title: String, ?dc_creator: String, ?dc_subject: String, ?dc_description: String, ?dc_publisher: String, ?dc_date: String, ?dc_type: String, ?dc_format: String, ?dc_identifier: String, ?dc_language: String, ?dc_rights: String, ?article: ArticleMetadata, ?hreflangs: Array[HreflangEntry], ?favicons: Array[FaviconInfo], ?headings: Array[HeadingInfo], ?word_count: Integer) -> void
396
- end
395
+ def initialize: (?title: String, ?description: String, ?canonical_url: String, ?keywords: String, ?author: String, ?viewport: String, ?theme_color: String, ?generator: String, ?robots: String, ?html_lang: String, ?html_dir: String, ?og_title: String, ?og_type: String, ?og_image: String, ?og_description: String, ?og_url: String, ?og_site_name: String, ?og_locale: String, ?og_video: String, ?og_audio: String, ?og_locale_alternates: Array[String], ?twitter_card: String, ?twitter_title: String, ?twitter_description: String, ?twitter_image: String, ?twitter_site: String, ?twitter_creator: String, ?dc_title: String, ?dc_creator: String, ?dc_subject: String, ?dc_description: String, ?dc_publisher: String, ?dc_date: String, ?dc_type: String, ?dc_format: String, ?dc_identifier: String, ?dc_language: String, ?dc_rights: String, ?article: ArticleMetadata, ?hreflangs: Array[HreflangEntry], ?favicons: Array[FaviconInfo], ?headings: Array[HeadingInfo], ?word_count: Integer) -> void
396
+ end
397
397
 
398
- class CrawlStreamRequest
399
- attr_accessor url: String?
398
+ class CrawlStreamRequest
399
+ attr_accessor url: String?
400
400
 
401
401
  def initialize: (?url: String) -> void
402
- end
402
+ end
403
403
 
404
- class BatchCrawlStreamRequest
404
+ class BatchCrawlStreamRequest
405
405
  attr_accessor urls: Array[String]?
406
406
 
407
- def initialize: (?urls: Array[String]) -> void
408
- end
407
+ def initialize: (?urls: Array[String]) -> void
408
+ end
409
409
 
410
- class CitationResult
411
- attr_accessor content: String?
410
+ class CitationResult
411
+ attr_accessor content: String?
412
412
  attr_accessor references: Array[CitationReference]?
413
413
 
414
- def initialize: (?content: String, ?references: Array[CitationReference]) -> void
415
- end
414
+ def initialize: (?content: String, ?references: Array[CitationReference]) -> void
415
+ end
416
416
 
417
- class CitationReference
418
- attr_accessor index: Integer?
419
- attr_accessor url: String?
420
- attr_accessor text: String?
417
+ class CitationReference
418
+ attr_accessor index: Integer?
419
+ attr_accessor url: String?
420
+ attr_accessor text: String?
421
421
 
422
422
  def initialize: (?index: Integer, ?url: String, ?text: String) -> void
423
- end
423
+ end
424
424
 
425
- class CrawlEngineHandle
426
- def crawl_stream: (CrawlStreamRequest req) -> Enumerator[CrawlStreamIterator]
427
- def batch_crawl_stream: (BatchCrawlStreamRequest req) -> Enumerator[BatchCrawlStreamIterator]
428
- end
425
+ class CrawlEngineHandle
426
+ def crawl_stream: (CrawlStreamRequest req) -> Enumerator[CrawlStreamIterator]
427
+ def batch_crawl_stream: (BatchCrawlStreamRequest req) -> Enumerator[BatchCrawlStreamIterator]
428
+ end
429
429
 
430
- class BatchScrapeResult
431
- attr_accessor url: String?
432
- attr_accessor result: ScrapeResult?
433
- attr_accessor error: String?
430
+ class BatchScrapeResult
431
+ attr_accessor url: String?
432
+ attr_accessor result: ScrapeResult?
433
+ attr_accessor error: String?
434
434
 
435
435
  def initialize: (?url: String, ?result: ScrapeResult, ?error: String) -> void
436
- end
436
+ end
437
437
 
438
- class BatchCrawlResult
439
- attr_accessor url: String?
440
- attr_accessor result: CrawlResult?
441
- attr_accessor error: String?
438
+ class BatchCrawlResult
439
+ attr_accessor url: String?
440
+ attr_accessor result: CrawlResult?
441
+ attr_accessor error: String?
442
442
 
443
443
  def initialize: (?url: String, ?result: CrawlResult, ?error: String) -> void
444
- end
444
+ end
445
445
 
446
- class BatchScrapeResults
446
+ class BatchScrapeResults
447
447
  attr_accessor results: Array[BatchScrapeResult]?
448
- attr_accessor total_count: Integer?
449
- attr_accessor completed_count: Integer?
450
- attr_accessor failed_count: Integer?
448
+ attr_accessor total_count: Integer?
449
+ attr_accessor completed_count: Integer?
450
+ attr_accessor failed_count: Integer?
451
451
 
452
- def initialize: (?results: Array[BatchScrapeResult], ?total_count: Integer, ?completed_count: Integer, ?failed_count: Integer) -> void
453
- end
452
+ def initialize: (?results: Array[BatchScrapeResult], ?total_count: Integer, ?completed_count: Integer, ?failed_count: Integer) -> void
453
+ end
454
454
 
455
- class BatchCrawlResults
455
+ class BatchCrawlResults
456
456
  attr_accessor results: Array[BatchCrawlResult]?
457
- attr_accessor total_count: Integer?
458
- attr_accessor completed_count: Integer?
459
- attr_accessor failed_count: Integer?
457
+ attr_accessor total_count: Integer?
458
+ attr_accessor completed_count: Integer?
459
+ attr_accessor failed_count: Integer?
460
460
 
461
- def initialize: (?results: Array[BatchCrawlResult], ?total_count: Integer, ?completed_count: Integer, ?failed_count: Integer) -> void
462
- end
461
+ def initialize: (?results: Array[BatchCrawlResult], ?total_count: Integer, ?completed_count: Integer, ?failed_count: Integer) -> void
462
+ end
463
463
 
464
- class SsrfPolicy
465
- attr_accessor deny_private: bool?
466
- attr_accessor max_redirects: Integer?
464
+ class SsrfPolicy
465
+ attr_accessor deny_private: bool?
466
+ attr_accessor max_redirects: Integer?
467
467
 
468
468
  def initialize: (?deny_private: bool, ?max_redirects: Integer) -> void
469
469
  def self.default: () -> SsrfPolicy
470
470
  def self.from_env: () -> SsrfPolicy
471
- end
471
+ end
472
472
 
473
- class BrowserMode
474
- type value = :auto | :always | :never | :stealth
475
- end
473
+ class BrowserMode
474
+ type value = :auto | :always | :never | :stealth
475
+ end
476
476
 
477
- class BrowserWait
478
- type value = :network_idle | :selector | :fixed
479
- end
477
+ class BrowserWait
478
+ type value = :network_idle | :selector | :fixed
479
+ end
480
480
 
481
- class BrowserBackend
482
- type value = :chromiumoxide | :native
483
- end
481
+ class BrowserBackend
482
+ type value = :chromiumoxide | :native
483
+ end
484
484
 
485
- class AuthConfig
486
- end
485
+ class AuthConfig
486
+ end
487
487
 
488
- class LinkType
489
- type value = :internal | :external | :anchor | :document
490
- end
488
+ class LinkType
489
+ type value = :internal | :external | :anchor | :document
490
+ end
491
491
 
492
- class ImageSource
493
- type value = :img | :picture_source | :og_image | :twitter_image
494
- end
492
+ class ImageSource
493
+ type value = :img | :picture_source | :og_image | :twitter_image
494
+ end
495
495
 
496
- class FeedType
497
- type value = :rss | :atom | :json_feed
498
- end
496
+ class FeedType
497
+ type value = :rss | :atom | :json_feed
498
+ end
499
499
 
500
- class AssetCategory
501
- type value = :document | :image | :audio | :video | :font | :stylesheet | :script | :archive | :data | :other
502
- end
500
+ class AssetCategory
501
+ type value = :document | :image | :audio | :video | :font | :stylesheet | :script | :archive | :data | :other
502
+ end
503
503
 
504
- class CrawlEvent
505
- end
504
+ class CrawlEvent
505
+ end
506
506
 
507
- class PageAction
508
- end
507
+ class PageAction
508
+ end
509
509
 
510
- class ScrollDirection
511
- type value = :up | :down
512
- end
510
+ class ScrollDirection
511
+ type value = :up | :down
512
+ end
513
513
 
514
- def self.generate_citations: (String markdown) -> CitationResult
514
+ def self.generate_citations: (String markdown) -> CitationResult
515
515
 
516
- def self.create_engine: (?CrawlConfig config) -> CrawlEngineHandle
516
+ def self.create_engine: (?CrawlConfig config) -> CrawlEngineHandle
517
517
 
518
- def self.scrape: (CrawlEngineHandle engine, String url) -> ScrapeResult
518
+ def self.scrape: (CrawlEngineHandle engine, String url) -> ScrapeResult
519
519
 
520
- def self.crawl: (CrawlEngineHandle engine, String url) -> CrawlResult
520
+ def self.crawl: (CrawlEngineHandle engine, String url) -> CrawlResult
521
521
 
522
- def self.map_urls: (CrawlEngineHandle engine, String url) -> MapResult
522
+ def self.map_urls: (CrawlEngineHandle engine, String url) -> MapResult
523
523
 
524
- def self.interact: (CrawlEngineHandle engine, String url, Array[PageAction] actions) -> InteractionResult
524
+ def self.interact: (CrawlEngineHandle engine, String url, Array[PageAction] actions) -> InteractionResult
525
525
 
526
- def self.batch_scrape: (CrawlEngineHandle engine, Array[String] urls) -> BatchScrapeResults
526
+ def self.batch_scrape: (CrawlEngineHandle engine, Array[String] urls) -> BatchScrapeResults
527
527
 
528
- def self.batch_crawl: (CrawlEngineHandle engine, Array[String] urls) -> BatchCrawlResults
528
+ def self.batch_crawl: (CrawlEngineHandle engine, Array[String] urls) -> BatchCrawlResults
529
529
 
530
530
  end