@kreuzberg/kreuzcrawl 0.1.0-rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/lib.rs ADDED
@@ -0,0 +1,1921 @@
1
+ // This file is auto-generated by alef. DO NOT EDIT.
2
+ // Re-generate with: alef generate
3
+ #![allow(dead_code)]
4
+
5
+ use napi::*;
6
+ use napi_derive::napi;
7
+ use std::collections::HashMap;
8
+ use std::sync::Arc;
9
+
10
+ static WORKER_POOL: std::sync::LazyLock<tokio::runtime::Runtime> = std::sync::LazyLock::new(|| {
11
+ tokio::runtime::Builder::new_multi_thread()
12
+ .enable_all()
13
+ .build()
14
+ .expect("Failed to create Tokio runtime")
15
+ });
16
+
17
+ #[derive(Clone, Default)]
18
+ #[napi(object)]
19
+ pub struct JsExtractionMeta {
20
+ pub cost: Option<f64>,
21
+ #[napi(js_name = "promptTokens")]
22
+ pub prompt_tokens: Option<i64>,
23
+ #[napi(js_name = "completionTokens")]
24
+ pub completion_tokens: Option<i64>,
25
+ pub model: Option<String>,
26
+ #[napi(js_name = "chunksProcessed")]
27
+ pub chunks_processed: Option<i64>,
28
+ }
29
+
30
+ #[derive(Clone, Default)]
31
+ #[napi(object)]
32
+ pub struct JsProxyConfig {
33
+ pub url: Option<String>,
34
+ pub username: Option<String>,
35
+ pub password: Option<String>,
36
+ }
37
+
38
+ #[derive(Clone, Default)]
39
+ #[napi(object)]
40
+ pub struct JsBrowserConfig {
41
+ pub mode: Option<JsBrowserMode>,
42
+ pub endpoint: Option<String>,
43
+ pub timeout: Option<i64>,
44
+ pub wait: Option<JsBrowserWait>,
45
+ #[napi(js_name = "waitSelector")]
46
+ pub wait_selector: Option<String>,
47
+ #[napi(js_name = "extraWait")]
48
+ pub extra_wait: Option<i64>,
49
+ }
50
+
51
+ #[derive(Clone, Default)]
52
+ #[napi(object)]
53
+ pub struct JsCrawlConfig {
54
+ #[napi(js_name = "maxDepth")]
55
+ pub max_depth: Option<i64>,
56
+ #[napi(js_name = "maxPages")]
57
+ pub max_pages: Option<i64>,
58
+ #[napi(js_name = "maxConcurrent")]
59
+ pub max_concurrent: Option<i64>,
60
+ #[napi(js_name = "respectRobotsTxt")]
61
+ pub respect_robots_txt: Option<bool>,
62
+ #[napi(js_name = "userAgent")]
63
+ pub user_agent: Option<String>,
64
+ #[napi(js_name = "stayOnDomain")]
65
+ pub stay_on_domain: Option<bool>,
66
+ #[napi(js_name = "allowSubdomains")]
67
+ pub allow_subdomains: Option<bool>,
68
+ #[napi(js_name = "includePaths")]
69
+ pub include_paths: Option<Vec<String>>,
70
+ #[napi(js_name = "excludePaths")]
71
+ pub exclude_paths: Option<Vec<String>>,
72
+ #[napi(js_name = "customHeaders")]
73
+ pub custom_headers: Option<HashMap<String, String>>,
74
+ #[napi(js_name = "requestTimeout")]
75
+ pub request_timeout: Option<i64>,
76
+ #[napi(js_name = "maxRedirects")]
77
+ pub max_redirects: Option<i64>,
78
+ #[napi(js_name = "retryCount")]
79
+ pub retry_count: Option<i64>,
80
+ #[napi(js_name = "retryCodes")]
81
+ pub retry_codes: Option<Vec<u16>>,
82
+ #[napi(js_name = "cookiesEnabled")]
83
+ pub cookies_enabled: Option<bool>,
84
+ pub auth: Option<JsAuthConfig>,
85
+ #[napi(js_name = "maxBodySize")]
86
+ pub max_body_size: Option<i64>,
87
+ #[napi(js_name = "mainContentOnly")]
88
+ pub main_content_only: Option<bool>,
89
+ #[napi(js_name = "removeTags")]
90
+ pub remove_tags: Option<Vec<String>>,
91
+ #[napi(js_name = "mapLimit")]
92
+ pub map_limit: Option<i64>,
93
+ #[napi(js_name = "mapSearch")]
94
+ pub map_search: Option<String>,
95
+ #[napi(js_name = "downloadAssets")]
96
+ pub download_assets: Option<bool>,
97
+ #[napi(js_name = "assetTypes")]
98
+ pub asset_types: Option<Vec<JsAssetCategory>>,
99
+ #[napi(js_name = "maxAssetSize")]
100
+ pub max_asset_size: Option<i64>,
101
+ pub browser: Option<JsBrowserConfig>,
102
+ pub proxy: Option<JsProxyConfig>,
103
+ #[napi(js_name = "userAgents")]
104
+ pub user_agents: Option<Vec<String>>,
105
+ #[napi(js_name = "captureScreenshot")]
106
+ pub capture_screenshot: Option<bool>,
107
+ #[napi(js_name = "downloadDocuments")]
108
+ pub download_documents: Option<bool>,
109
+ #[napi(js_name = "documentMaxSize")]
110
+ pub document_max_size: Option<i64>,
111
+ #[napi(js_name = "documentMimeTypes")]
112
+ pub document_mime_types: Option<Vec<String>>,
113
+ #[napi(js_name = "warcOutput")]
114
+ pub warc_output: Option<String>,
115
+ #[napi(js_name = "browserProfile")]
116
+ pub browser_profile: Option<String>,
117
+ #[napi(js_name = "saveBrowserProfile")]
118
+ pub save_browser_profile: Option<bool>,
119
+ }
120
+
121
+ #[derive(Clone, Default)]
122
+ #[napi(object)]
123
+ pub struct JsDownloadedDocument {
124
+ pub url: Option<String>,
125
+ #[napi(js_name = "mimeType")]
126
+ pub mime_type: Option<String>,
127
+ pub content: Option<Vec<u8>>,
128
+ pub size: Option<i64>,
129
+ pub filename: Option<String>,
130
+ #[napi(js_name = "contentHash")]
131
+ pub content_hash: Option<String>,
132
+ pub headers: Option<HashMap<String, String>>,
133
+ }
134
+
135
+ #[derive(Clone, Default)]
136
+ #[napi(object)]
137
+ pub struct JsInteractionResult {
138
+ #[napi(js_name = "actionResults")]
139
+ pub action_results: Option<Vec<JsActionResult>>,
140
+ #[napi(js_name = "finalHtml")]
141
+ pub final_html: Option<String>,
142
+ #[napi(js_name = "finalUrl")]
143
+ pub final_url: Option<String>,
144
+ pub screenshot: Option<Vec<u8>>,
145
+ }
146
+
147
+ #[derive(Clone, Default)]
148
+ #[napi(object)]
149
+ pub struct JsActionResult {
150
+ #[napi(js_name = "actionIndex")]
151
+ pub action_index: Option<i64>,
152
+ #[napi(js_name = "actionType")]
153
+ pub action_type: Option<String>,
154
+ pub success: Option<bool>,
155
+ pub data: Option<String>,
156
+ pub error: Option<String>,
157
+ }
158
+
159
+ #[derive(Clone, Default)]
160
+ #[napi(object)]
161
+ pub struct JsScrapeResult {
162
+ #[napi(js_name = "statusCode")]
163
+ pub status_code: Option<u16>,
164
+ #[napi(js_name = "contentType")]
165
+ pub content_type: Option<String>,
166
+ pub html: Option<String>,
167
+ #[napi(js_name = "bodySize")]
168
+ pub body_size: Option<i64>,
169
+ pub metadata: Option<JsPageMetadata>,
170
+ pub links: Option<Vec<JsLinkInfo>>,
171
+ pub images: Option<Vec<JsImageInfo>>,
172
+ pub feeds: Option<Vec<JsFeedInfo>>,
173
+ #[napi(js_name = "jsonLd")]
174
+ pub json_ld: Option<Vec<JsJsonLdEntry>>,
175
+ #[napi(js_name = "isAllowed")]
176
+ pub is_allowed: Option<bool>,
177
+ #[napi(js_name = "crawlDelay")]
178
+ pub crawl_delay: Option<i64>,
179
+ #[napi(js_name = "noindexDetected")]
180
+ pub noindex_detected: Option<bool>,
181
+ #[napi(js_name = "nofollowDetected")]
182
+ pub nofollow_detected: Option<bool>,
183
+ #[napi(js_name = "xRobotsTag")]
184
+ pub x_robots_tag: Option<String>,
185
+ #[napi(js_name = "isPdf")]
186
+ pub is_pdf: Option<bool>,
187
+ #[napi(js_name = "wasSkipped")]
188
+ pub was_skipped: Option<bool>,
189
+ #[napi(js_name = "detectedCharset")]
190
+ pub detected_charset: Option<String>,
191
+ #[napi(js_name = "mainContentOnly")]
192
+ pub main_content_only: Option<bool>,
193
+ #[napi(js_name = "authHeaderSent")]
194
+ pub auth_header_sent: Option<bool>,
195
+ #[napi(js_name = "responseMeta")]
196
+ pub response_meta: Option<JsResponseMeta>,
197
+ pub assets: Option<Vec<JsDownloadedAsset>>,
198
+ #[napi(js_name = "jsRenderHint")]
199
+ pub js_render_hint: Option<bool>,
200
+ #[napi(js_name = "browserUsed")]
201
+ pub browser_used: Option<bool>,
202
+ pub markdown: Option<JsMarkdownResult>,
203
+ #[napi(js_name = "extractedData")]
204
+ pub extracted_data: Option<String>,
205
+ #[napi(js_name = "extractionMeta")]
206
+ pub extraction_meta: Option<JsExtractionMeta>,
207
+ pub screenshot: Option<Vec<u8>>,
208
+ #[napi(js_name = "downloadedDocument")]
209
+ pub downloaded_document: Option<JsDownloadedDocument>,
210
+ }
211
+
212
+ #[derive(Clone, Default)]
213
+ #[napi(object)]
214
+ pub struct JsCrawlPageResult {
215
+ pub url: Option<String>,
216
+ #[napi(js_name = "normalizedUrl")]
217
+ pub normalized_url: Option<String>,
218
+ #[napi(js_name = "statusCode")]
219
+ pub status_code: Option<u16>,
220
+ #[napi(js_name = "contentType")]
221
+ pub content_type: Option<String>,
222
+ pub html: Option<String>,
223
+ #[napi(js_name = "bodySize")]
224
+ pub body_size: Option<i64>,
225
+ pub metadata: Option<JsPageMetadata>,
226
+ pub links: Option<Vec<JsLinkInfo>>,
227
+ pub images: Option<Vec<JsImageInfo>>,
228
+ pub feeds: Option<Vec<JsFeedInfo>>,
229
+ #[napi(js_name = "jsonLd")]
230
+ pub json_ld: Option<Vec<JsJsonLdEntry>>,
231
+ pub depth: Option<i64>,
232
+ #[napi(js_name = "stayedOnDomain")]
233
+ pub stayed_on_domain: Option<bool>,
234
+ #[napi(js_name = "wasSkipped")]
235
+ pub was_skipped: Option<bool>,
236
+ #[napi(js_name = "isPdf")]
237
+ pub is_pdf: Option<bool>,
238
+ #[napi(js_name = "detectedCharset")]
239
+ pub detected_charset: Option<String>,
240
+ pub markdown: Option<JsMarkdownResult>,
241
+ #[napi(js_name = "extractedData")]
242
+ pub extracted_data: Option<String>,
243
+ #[napi(js_name = "extractionMeta")]
244
+ pub extraction_meta: Option<JsExtractionMeta>,
245
+ #[napi(js_name = "downloadedDocument")]
246
+ pub downloaded_document: Option<JsDownloadedDocument>,
247
+ }
248
+
249
+ #[derive(Clone, Default)]
250
+ #[napi(object)]
251
+ pub struct JsCrawlResult {
252
+ pub pages: Option<Vec<JsCrawlPageResult>>,
253
+ #[napi(js_name = "finalUrl")]
254
+ pub final_url: Option<String>,
255
+ #[napi(js_name = "redirectCount")]
256
+ pub redirect_count: Option<i64>,
257
+ #[napi(js_name = "wasSkipped")]
258
+ pub was_skipped: Option<bool>,
259
+ pub error: Option<String>,
260
+ pub cookies: Option<Vec<JsCookieInfo>>,
261
+ #[napi(js_name = "normalizedUrls")]
262
+ pub normalized_urls: Option<Vec<String>>,
263
+ }
264
+
265
+ #[derive(Clone, Default)]
266
+ #[napi(object)]
267
+ pub struct JsSitemapUrl {
268
+ pub url: Option<String>,
269
+ pub lastmod: Option<String>,
270
+ pub changefreq: Option<String>,
271
+ pub priority: Option<String>,
272
+ }
273
+
274
+ #[derive(Clone, Default)]
275
+ #[napi(object)]
276
+ pub struct JsMapResult {
277
+ pub urls: Option<Vec<JsSitemapUrl>>,
278
+ }
279
+
280
+ #[derive(Clone, Default)]
281
+ #[napi(object)]
282
+ pub struct JsMarkdownResult {
283
+ pub content: Option<String>,
284
+ #[napi(js_name = "documentStructure")]
285
+ pub document_structure: Option<String>,
286
+ pub tables: Option<Vec<String>>,
287
+ pub warnings: Option<Vec<String>>,
288
+ pub citations: Option<JsCitationResult>,
289
+ #[napi(js_name = "fitContent")]
290
+ pub fit_content: Option<String>,
291
+ }
292
+
293
+ #[derive(Clone, Default)]
294
+ #[napi(object)]
295
+ pub struct JsCachedPage {
296
+ pub url: Option<String>,
297
+ #[napi(js_name = "statusCode")]
298
+ pub status_code: Option<u16>,
299
+ #[napi(js_name = "contentType")]
300
+ pub content_type: Option<String>,
301
+ pub body: Option<String>,
302
+ pub etag: Option<String>,
303
+ #[napi(js_name = "lastModified")]
304
+ pub last_modified: Option<String>,
305
+ #[napi(js_name = "cachedAt")]
306
+ pub cached_at: Option<i64>,
307
+ }
308
+
309
+ #[derive(Clone, Default)]
310
+ #[napi(object)]
311
+ pub struct JsLinkInfo {
312
+ pub url: Option<String>,
313
+ pub text: Option<String>,
314
+ #[napi(js_name = "linkType")]
315
+ pub link_type: Option<JsLinkType>,
316
+ pub rel: Option<String>,
317
+ pub nofollow: Option<bool>,
318
+ }
319
+
320
+ #[derive(Clone, Default)]
321
+ #[napi(object)]
322
+ pub struct JsImageInfo {
323
+ pub url: Option<String>,
324
+ pub alt: Option<String>,
325
+ pub width: Option<u32>,
326
+ pub height: Option<u32>,
327
+ pub source: Option<JsImageSource>,
328
+ }
329
+
330
+ #[derive(Clone, Default)]
331
+ #[napi(object)]
332
+ pub struct JsFeedInfo {
333
+ pub url: Option<String>,
334
+ pub title: Option<String>,
335
+ #[napi(js_name = "feedType")]
336
+ pub feed_type: Option<JsFeedType>,
337
+ }
338
+
339
+ #[derive(Clone, Default)]
340
+ #[napi(object)]
341
+ pub struct JsJsonLdEntry {
342
+ #[napi(js_name = "schemaType")]
343
+ pub schema_type: Option<String>,
344
+ pub name: Option<String>,
345
+ pub raw: Option<String>,
346
+ }
347
+
348
+ #[derive(Clone, Default)]
349
+ #[napi(object)]
350
+ pub struct JsCookieInfo {
351
+ pub name: Option<String>,
352
+ pub value: Option<String>,
353
+ pub domain: Option<String>,
354
+ pub path: Option<String>,
355
+ }
356
+
357
+ #[derive(Clone, Default)]
358
+ #[napi(object)]
359
+ pub struct JsDownloadedAsset {
360
+ pub url: Option<String>,
361
+ #[napi(js_name = "contentHash")]
362
+ pub content_hash: Option<String>,
363
+ #[napi(js_name = "mimeType")]
364
+ pub mime_type: Option<String>,
365
+ pub size: Option<i64>,
366
+ #[napi(js_name = "assetCategory")]
367
+ pub asset_category: Option<JsAssetCategory>,
368
+ #[napi(js_name = "htmlTag")]
369
+ pub html_tag: Option<String>,
370
+ }
371
+
372
+ #[derive(Clone, Default)]
373
+ #[napi(object)]
374
+ pub struct JsArticleMetadata {
375
+ #[napi(js_name = "publishedTime")]
376
+ pub published_time: Option<String>,
377
+ #[napi(js_name = "modifiedTime")]
378
+ pub modified_time: Option<String>,
379
+ pub author: Option<String>,
380
+ pub section: Option<String>,
381
+ pub tags: Option<Vec<String>>,
382
+ }
383
+
384
+ #[derive(Clone, Default)]
385
+ #[napi(object)]
386
+ pub struct JsHreflangEntry {
387
+ pub lang: Option<String>,
388
+ pub url: Option<String>,
389
+ }
390
+
391
+ #[derive(Clone, Default)]
392
+ #[napi(object)]
393
+ pub struct JsFaviconInfo {
394
+ pub url: Option<String>,
395
+ pub rel: Option<String>,
396
+ pub sizes: Option<String>,
397
+ #[napi(js_name = "mimeType")]
398
+ pub mime_type: Option<String>,
399
+ }
400
+
401
+ #[derive(Clone, Default)]
402
+ #[napi(object)]
403
+ pub struct JsHeadingInfo {
404
+ pub level: Option<u8>,
405
+ pub text: Option<String>,
406
+ }
407
+
408
+ #[derive(Clone, Default)]
409
+ #[napi(object)]
410
+ pub struct JsResponseMeta {
411
+ pub etag: Option<String>,
412
+ #[napi(js_name = "lastModified")]
413
+ pub last_modified: Option<String>,
414
+ #[napi(js_name = "cacheControl")]
415
+ pub cache_control: Option<String>,
416
+ pub server: Option<String>,
417
+ #[napi(js_name = "xPoweredBy")]
418
+ pub x_powered_by: Option<String>,
419
+ #[napi(js_name = "contentLanguage")]
420
+ pub content_language: Option<String>,
421
+ #[napi(js_name = "contentEncoding")]
422
+ pub content_encoding: Option<String>,
423
+ }
424
+
425
+ #[derive(Clone, Default)]
426
+ #[napi(object)]
427
+ pub struct JsPageMetadata {
428
+ pub title: Option<String>,
429
+ pub description: Option<String>,
430
+ #[napi(js_name = "canonicalUrl")]
431
+ pub canonical_url: Option<String>,
432
+ pub keywords: Option<String>,
433
+ pub author: Option<String>,
434
+ pub viewport: Option<String>,
435
+ #[napi(js_name = "themeColor")]
436
+ pub theme_color: Option<String>,
437
+ pub generator: Option<String>,
438
+ pub robots: Option<String>,
439
+ #[napi(js_name = "htmlLang")]
440
+ pub html_lang: Option<String>,
441
+ #[napi(js_name = "htmlDir")]
442
+ pub html_dir: Option<String>,
443
+ #[napi(js_name = "ogTitle")]
444
+ pub og_title: Option<String>,
445
+ #[napi(js_name = "ogType")]
446
+ pub og_type: Option<String>,
447
+ #[napi(js_name = "ogImage")]
448
+ pub og_image: Option<String>,
449
+ #[napi(js_name = "ogDescription")]
450
+ pub og_description: Option<String>,
451
+ #[napi(js_name = "ogUrl")]
452
+ pub og_url: Option<String>,
453
+ #[napi(js_name = "ogSiteName")]
454
+ pub og_site_name: Option<String>,
455
+ #[napi(js_name = "ogLocale")]
456
+ pub og_locale: Option<String>,
457
+ #[napi(js_name = "ogVideo")]
458
+ pub og_video: Option<String>,
459
+ #[napi(js_name = "ogAudio")]
460
+ pub og_audio: Option<String>,
461
+ #[napi(js_name = "ogLocaleAlternates")]
462
+ pub og_locale_alternates: Option<Vec<String>>,
463
+ #[napi(js_name = "twitterCard")]
464
+ pub twitter_card: Option<String>,
465
+ #[napi(js_name = "twitterTitle")]
466
+ pub twitter_title: Option<String>,
467
+ #[napi(js_name = "twitterDescription")]
468
+ pub twitter_description: Option<String>,
469
+ #[napi(js_name = "twitterImage")]
470
+ pub twitter_image: Option<String>,
471
+ #[napi(js_name = "twitterSite")]
472
+ pub twitter_site: Option<String>,
473
+ #[napi(js_name = "twitterCreator")]
474
+ pub twitter_creator: Option<String>,
475
+ #[napi(js_name = "dcTitle")]
476
+ pub dc_title: Option<String>,
477
+ #[napi(js_name = "dcCreator")]
478
+ pub dc_creator: Option<String>,
479
+ #[napi(js_name = "dcSubject")]
480
+ pub dc_subject: Option<String>,
481
+ #[napi(js_name = "dcDescription")]
482
+ pub dc_description: Option<String>,
483
+ #[napi(js_name = "dcPublisher")]
484
+ pub dc_publisher: Option<String>,
485
+ #[napi(js_name = "dcDate")]
486
+ pub dc_date: Option<String>,
487
+ #[napi(js_name = "dcType")]
488
+ pub dc_type: Option<String>,
489
+ #[napi(js_name = "dcFormat")]
490
+ pub dc_format: Option<String>,
491
+ #[napi(js_name = "dcIdentifier")]
492
+ pub dc_identifier: Option<String>,
493
+ #[napi(js_name = "dcLanguage")]
494
+ pub dc_language: Option<String>,
495
+ #[napi(js_name = "dcRights")]
496
+ pub dc_rights: Option<String>,
497
+ pub article: Option<JsArticleMetadata>,
498
+ pub hreflangs: Option<Vec<JsHreflangEntry>>,
499
+ pub favicons: Option<Vec<JsFaviconInfo>>,
500
+ pub headings: Option<Vec<JsHeadingInfo>>,
501
+ #[napi(js_name = "wordCount")]
502
+ pub word_count: Option<i64>,
503
+ }
504
+
505
+ #[derive(Clone, Default)]
506
+ #[napi(object)]
507
+ pub struct JsCitationResult {
508
+ pub content: Option<String>,
509
+ pub references: Option<Vec<JsCitationReference>>,
510
+ }
511
+
512
+ #[derive(Clone, Default)]
513
+ #[napi(object)]
514
+ pub struct JsCitationReference {
515
+ pub index: Option<i64>,
516
+ pub url: Option<String>,
517
+ pub text: Option<String>,
518
+ }
519
+
520
+ #[derive(Clone)]
521
+ #[napi]
522
+ pub struct JsCrawlEngineHandle {
523
+ inner: Arc<kreuzcrawl::CrawlEngineHandle>,
524
+ }
525
+
526
+ #[napi]
527
+ impl JsCrawlEngineHandle {}
528
+
529
+ #[derive(Clone, Default)]
530
+ #[napi(object)]
531
+ pub struct JsBatchScrapeResult {
532
+ pub url: Option<String>,
533
+ pub result: Option<JsScrapeResult>,
534
+ pub error: Option<String>,
535
+ }
536
+
537
+ #[derive(Clone, Default)]
538
+ #[napi(object)]
539
+ pub struct JsBatchCrawlResult {
540
+ pub url: Option<String>,
541
+ pub result: Option<JsCrawlResult>,
542
+ pub error: Option<String>,
543
+ }
544
+
545
+ #[napi(string_enum = "snake_case")]
546
+ #[derive(Clone)]
547
+ pub enum JsBrowserMode {
548
+ Auto,
549
+ Always,
550
+ Never,
551
+ }
552
+
553
+ #[allow(clippy::derivable_impls)]
554
+ impl Default for JsBrowserMode {
555
+ fn default() -> Self {
556
+ Self::Auto
557
+ }
558
+ }
559
+
560
+ #[napi(string_enum = "snake_case")]
561
+ #[derive(Clone)]
562
+ pub enum JsBrowserWait {
563
+ NetworkIdle,
564
+ Selector,
565
+ Fixed,
566
+ }
567
+
568
+ #[allow(clippy::derivable_impls)]
569
+ impl Default for JsBrowserWait {
570
+ fn default() -> Self {
571
+ Self::NetworkIdle
572
+ }
573
+ }
574
+
575
+ #[derive(Clone)]
576
+ #[napi(object)]
577
+ pub struct JsAuthConfig {
578
+ #[napi(js_name = "type")]
579
+ pub type_tag: String,
580
+ pub username: Option<String>,
581
+ pub password: Option<String>,
582
+ pub token: Option<String>,
583
+ pub name: Option<String>,
584
+ pub value: Option<String>,
585
+ }
586
+
587
+ #[allow(clippy::derivable_impls)]
588
+ impl Default for JsAuthConfig {
589
+ fn default() -> Self {
590
+ Self {
591
+ type_tag: String::new(),
592
+ name: None,
593
+ password: None,
594
+ token: None,
595
+ username: None,
596
+ value: None,
597
+ }
598
+ }
599
+ }
600
+
601
+ #[napi(string_enum = "snake_case")]
602
+ #[derive(Clone)]
603
+ pub enum JsLinkType {
604
+ Internal,
605
+ External,
606
+ Anchor,
607
+ Document,
608
+ }
609
+
610
+ #[allow(clippy::derivable_impls)]
611
+ impl Default for JsLinkType {
612
+ fn default() -> Self {
613
+ Self::Internal
614
+ }
615
+ }
616
+
617
+ #[napi(string_enum = "snake_case")]
618
+ #[derive(Clone)]
619
+ pub enum JsImageSource {
620
+ Img,
621
+ PictureSource,
622
+ OgImage,
623
+ TwitterImage,
624
+ }
625
+
626
+ #[allow(clippy::derivable_impls)]
627
+ impl Default for JsImageSource {
628
+ fn default() -> Self {
629
+ Self::Img
630
+ }
631
+ }
632
+
633
+ #[napi(string_enum = "snake_case")]
634
+ #[derive(Clone)]
635
+ pub enum JsFeedType {
636
+ Rss,
637
+ Atom,
638
+ JsonFeed,
639
+ }
640
+
641
+ #[allow(clippy::derivable_impls)]
642
+ impl Default for JsFeedType {
643
+ fn default() -> Self {
644
+ Self::Rss
645
+ }
646
+ }
647
+
648
+ #[napi(string_enum = "snake_case")]
649
+ #[derive(Clone)]
650
+ pub enum JsAssetCategory {
651
+ Document,
652
+ Image,
653
+ Audio,
654
+ Video,
655
+ Font,
656
+ Stylesheet,
657
+ Script,
658
+ Archive,
659
+ Data,
660
+ Other,
661
+ }
662
+
663
+ #[allow(clippy::derivable_impls)]
664
+ impl Default for JsAssetCategory {
665
+ fn default() -> Self {
666
+ Self::Document
667
+ }
668
+ }
669
+
670
+ #[napi(string_enum)]
671
+ #[derive(Clone)]
672
+ pub enum JsCrawlEvent {
673
+ Page,
674
+ Error,
675
+ Complete,
676
+ }
677
+
678
+ #[allow(clippy::derivable_impls)]
679
+ impl Default for JsCrawlEvent {
680
+ fn default() -> Self {
681
+ Self::Page
682
+ }
683
+ }
684
+
685
+ #[allow(clippy::missing_errors_doc)]
686
+ #[napi(js_name = "createEngine")]
687
+ pub fn create_engine(config: Option<JsCrawlConfig>) -> Result<JsCrawlEngineHandle> {
688
+ let config_core = config.map(Into::into);
689
+ kreuzcrawl::create_engine(config_core)
690
+ .map(|val| JsCrawlEngineHandle { inner: Arc::new(val) })
691
+ .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))
692
+ }
693
+
694
+ #[allow(clippy::missing_errors_doc)]
695
+ #[napi]
696
+ pub async fn scrape(engine: &JsCrawlEngineHandle, url: String) -> Result<JsScrapeResult> {
697
+ let result = kreuzcrawl::scrape(&engine.inner, &url)
698
+ .await
699
+ .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?;
700
+ Ok(result.into())
701
+ }
702
+
703
+ #[allow(clippy::missing_errors_doc)]
704
+ #[napi]
705
+ pub async fn crawl(engine: &JsCrawlEngineHandle, url: String) -> Result<JsCrawlResult> {
706
+ let result = kreuzcrawl::crawl(&engine.inner, &url)
707
+ .await
708
+ .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?;
709
+ Ok(result.into())
710
+ }
711
+
712
+ #[allow(clippy::missing_errors_doc)]
713
+ #[napi(js_name = "mapUrls")]
714
+ pub async fn map_urls(engine: &JsCrawlEngineHandle, url: String) -> Result<JsMapResult> {
715
+ let result = kreuzcrawl::map_urls(&engine.inner, &url)
716
+ .await
717
+ .map_err(|e| napi::Error::new(napi::Status::GenericFailure, e.to_string()))?;
718
+ Ok(result.into())
719
+ }
720
+
721
+ #[napi(js_name = "batchScrape")]
722
+ pub async fn batch_scrape(engine: &JsCrawlEngineHandle, urls: Vec<String>) -> Vec<JsBatchScrapeResult> {
723
+ let result = kreuzcrawl::batch_scrape(&engine.inner, urls).await;
724
+ result.into_iter().map(Into::into).collect()
725
+ }
726
+
727
+ #[napi(js_name = "batchCrawl")]
728
+ pub async fn batch_crawl(engine: &JsCrawlEngineHandle, urls: Vec<String>) -> Vec<JsBatchCrawlResult> {
729
+ let result = kreuzcrawl::batch_crawl(&engine.inner, urls).await;
730
+ result.into_iter().map(Into::into).collect()
731
+ }
732
+
733
+ impl From<JsExtractionMeta> for kreuzcrawl::ExtractionMeta {
734
+ fn from(val: JsExtractionMeta) -> Self {
735
+ Self {
736
+ cost: val.cost,
737
+ prompt_tokens: val.prompt_tokens.map(|v| v as u64),
738
+ completion_tokens: val.completion_tokens.map(|v| v as u64),
739
+ model: val.model,
740
+ chunks_processed: val.chunks_processed.map(|v| v as usize).unwrap_or_default(),
741
+ }
742
+ }
743
+ }
744
+
745
+ impl From<kreuzcrawl::ExtractionMeta> for JsExtractionMeta {
746
+ fn from(val: kreuzcrawl::ExtractionMeta) -> Self {
747
+ Self {
748
+ cost: val.cost,
749
+ prompt_tokens: val.prompt_tokens.map(|v| v as i64),
750
+ completion_tokens: val.completion_tokens.map(|v| v as i64),
751
+ model: val.model,
752
+ chunks_processed: Some(val.chunks_processed as i64),
753
+ }
754
+ }
755
+ }
756
+
757
+ impl From<JsProxyConfig> for kreuzcrawl::ProxyConfig {
758
+ fn from(val: JsProxyConfig) -> Self {
759
+ Self {
760
+ url: val.url.unwrap_or_default(),
761
+ username: val.username,
762
+ password: val.password,
763
+ }
764
+ }
765
+ }
766
+
767
+ impl From<kreuzcrawl::ProxyConfig> for JsProxyConfig {
768
+ fn from(val: kreuzcrawl::ProxyConfig) -> Self {
769
+ Self {
770
+ url: Some(val.url),
771
+ username: val.username,
772
+ password: val.password,
773
+ }
774
+ }
775
+ }
776
+
777
+ #[allow(clippy::field_reassign_with_default)]
778
+ impl From<JsBrowserConfig> for kreuzcrawl::BrowserConfig {
779
+ fn from(val: JsBrowserConfig) -> Self {
780
+ let mut __result = kreuzcrawl::BrowserConfig::default();
781
+ __result.mode = val.mode.map(Into::into).unwrap_or_default();
782
+ __result.endpoint = val.endpoint;
783
+ if let Some(__v) = val.timeout {
784
+ __result.timeout = std::time::Duration::from_millis(__v as u64);
785
+ }
786
+ __result.wait = val.wait.map(Into::into).unwrap_or_default();
787
+ __result.wait_selector = val.wait_selector;
788
+ __result.extra_wait = val.extra_wait.map(|v| std::time::Duration::from_millis(v as u64));
789
+ __result
790
+ }
791
+ }
792
+
793
+ impl From<kreuzcrawl::BrowserConfig> for JsBrowserConfig {
794
+ fn from(val: kreuzcrawl::BrowserConfig) -> Self {
795
+ Self {
796
+ mode: Some(val.mode.into()),
797
+ endpoint: val.endpoint,
798
+ timeout: Some(val.timeout.as_millis() as u64 as i64),
799
+ wait: Some(val.wait.into()),
800
+ wait_selector: val.wait_selector,
801
+ extra_wait: val.extra_wait.map(|d| d.as_millis() as u64 as i64),
802
+ }
803
+ }
804
+ }
805
+
806
+ #[allow(clippy::needless_update)]
807
+ #[allow(clippy::field_reassign_with_default)]
808
+ impl From<JsCrawlConfig> for kreuzcrawl::CrawlConfig {
809
+ fn from(val: JsCrawlConfig) -> Self {
810
+ let mut __result = kreuzcrawl::CrawlConfig::default();
811
+ __result.max_depth = val.max_depth.map(|v| v as usize);
812
+ __result.max_pages = val.max_pages.map(|v| v as usize);
813
+ __result.max_concurrent = val.max_concurrent.map(|v| v as usize);
814
+ __result.respect_robots_txt = val.respect_robots_txt.unwrap_or_default();
815
+ __result.user_agent = val.user_agent;
816
+ __result.stay_on_domain = val.stay_on_domain.unwrap_or_default();
817
+ __result.allow_subdomains = val.allow_subdomains.unwrap_or_default();
818
+ __result.include_paths = val.include_paths.unwrap_or_default();
819
+ __result.exclude_paths = val.exclude_paths.unwrap_or_default();
820
+ __result.custom_headers = val.custom_headers.unwrap_or_default().into_iter().collect();
821
+ if let Some(__v) = val.request_timeout {
822
+ __result.request_timeout = std::time::Duration::from_millis(__v as u64);
823
+ }
824
+ __result.max_redirects = val.max_redirects.map(|v| v as usize).unwrap_or_default();
825
+ __result.retry_count = val.retry_count.map(|v| v as usize).unwrap_or_default();
826
+ __result.retry_codes = val.retry_codes.unwrap_or_default();
827
+ __result.cookies_enabled = val.cookies_enabled.unwrap_or_default();
828
+ __result.auth = val.auth.map(Into::into);
829
+ __result.max_body_size = val.max_body_size.map(|v| v as usize);
830
+ __result.main_content_only = val.main_content_only.unwrap_or_default();
831
+ __result.remove_tags = val.remove_tags.unwrap_or_default();
832
+ __result.map_limit = val.map_limit.map(|v| v as usize);
833
+ __result.map_search = val.map_search;
834
+ __result.download_assets = val.download_assets.unwrap_or_default();
835
+ __result.asset_types = val
836
+ .asset_types
837
+ .map(|v| v.into_iter().map(Into::into).collect())
838
+ .unwrap_or_default();
839
+ __result.max_asset_size = val.max_asset_size.map(|v| v as usize);
840
+ __result.browser = val.browser.map(Into::into).unwrap_or_default();
841
+ __result.proxy = val.proxy.map(Into::into);
842
+ __result.user_agents = val.user_agents.unwrap_or_default();
843
+ __result.capture_screenshot = val.capture_screenshot.unwrap_or_default();
844
+ __result.download_documents = val.download_documents.unwrap_or_default();
845
+ __result.document_max_size = val.document_max_size.map(|v| v as usize);
846
+ __result.document_mime_types = val.document_mime_types.unwrap_or_default();
847
+ __result.warc_output = val.warc_output.map(Into::into);
848
+ __result.browser_profile = val.browser_profile;
849
+ __result.save_browser_profile = val.save_browser_profile.unwrap_or_default();
850
+ __result
851
+ }
852
+ }
853
+
854
+ impl From<kreuzcrawl::CrawlConfig> for JsCrawlConfig {
855
+ fn from(val: kreuzcrawl::CrawlConfig) -> Self {
856
+ Self {
857
+ max_depth: val.max_depth.map(|v| v as i64),
858
+ max_pages: val.max_pages.map(|v| v as i64),
859
+ max_concurrent: val.max_concurrent.map(|v| v as i64),
860
+ respect_robots_txt: Some(val.respect_robots_txt),
861
+ user_agent: val.user_agent,
862
+ stay_on_domain: Some(val.stay_on_domain),
863
+ allow_subdomains: Some(val.allow_subdomains),
864
+ include_paths: Some(val.include_paths),
865
+ exclude_paths: Some(val.exclude_paths),
866
+ custom_headers: Some(val.custom_headers.into_iter().collect()),
867
+ request_timeout: Some(val.request_timeout.as_millis() as u64 as i64),
868
+ max_redirects: Some(val.max_redirects as i64),
869
+ retry_count: Some(val.retry_count as i64),
870
+ retry_codes: Some(val.retry_codes),
871
+ cookies_enabled: Some(val.cookies_enabled),
872
+ auth: val.auth.map(Into::into),
873
+ max_body_size: val.max_body_size.map(|v| v as i64),
874
+ main_content_only: Some(val.main_content_only),
875
+ remove_tags: Some(val.remove_tags),
876
+ map_limit: val.map_limit.map(|v| v as i64),
877
+ map_search: val.map_search,
878
+ download_assets: Some(val.download_assets),
879
+ asset_types: Some(val.asset_types.into_iter().map(Into::into).collect()),
880
+ max_asset_size: val.max_asset_size.map(|v| v as i64),
881
+ browser: Some(val.browser.into()),
882
+ proxy: val.proxy.map(Into::into),
883
+ user_agents: Some(val.user_agents),
884
+ capture_screenshot: Some(val.capture_screenshot),
885
+ download_documents: Some(val.download_documents),
886
+ document_max_size: val.document_max_size.map(|v| v as i64),
887
+ document_mime_types: Some(val.document_mime_types),
888
+ warc_output: val.warc_output.map(|p| p.to_string_lossy().to_string()),
889
+ browser_profile: val.browser_profile,
890
+ save_browser_profile: Some(val.save_browser_profile),
891
+ }
892
+ }
893
+ }
894
+
895
+ impl From<JsDownloadedDocument> for kreuzcrawl::DownloadedDocument {
896
+ fn from(val: JsDownloadedDocument) -> Self {
897
+ Self {
898
+ url: val.url.unwrap_or_default(),
899
+ mime_type: Default::default(),
900
+ content: val.content.unwrap_or_default(),
901
+ size: val.size.map(|v| v as usize).unwrap_or_default(),
902
+ filename: Default::default(),
903
+ content_hash: Default::default(),
904
+ headers: Default::default(),
905
+ }
906
+ }
907
+ }
908
+
909
+ impl From<kreuzcrawl::DownloadedDocument> for JsDownloadedDocument {
910
+ fn from(val: kreuzcrawl::DownloadedDocument) -> Self {
911
+ Self {
912
+ url: Some(val.url),
913
+ mime_type: Some(format!("{:?}", val.mime_type)),
914
+ content: Some(val.content.to_vec()),
915
+ size: Some(val.size as i64),
916
+ filename: val.filename.as_ref().map(|v| format!("{:?}", v)),
917
+ content_hash: Some(format!("{:?}", val.content_hash)),
918
+ headers: Some(
919
+ val.headers
920
+ .into_iter()
921
+ .map(|(k, v)| (format!("{:?}", k), format!("{:?}", v)))
922
+ .collect(),
923
+ ),
924
+ }
925
+ }
926
+ }
927
+
928
+ impl From<kreuzcrawl::InteractionResult> for JsInteractionResult {
929
+ fn from(val: kreuzcrawl::InteractionResult) -> Self {
930
+ Self {
931
+ action_results: Some(val.action_results.into_iter().map(Into::into).collect()),
932
+ final_html: Some(val.final_html),
933
+ final_url: Some(val.final_url),
934
+ screenshot: val.screenshot.map(|v| v.to_vec()),
935
+ }
936
+ }
937
+ }
938
+
939
+ impl From<kreuzcrawl::ActionResult> for JsActionResult {
940
+ fn from(val: kreuzcrawl::ActionResult) -> Self {
941
+ Self {
942
+ action_index: Some(val.action_index as i64),
943
+ action_type: Some(format!("{:?}", val.action_type)),
944
+ success: Some(val.success),
945
+ data: val.data.as_ref().map(ToString::to_string),
946
+ error: val.error,
947
+ }
948
+ }
949
+ }
950
+
951
+ impl From<JsScrapeResult> for kreuzcrawl::ScrapeResult {
952
+ fn from(val: JsScrapeResult) -> Self {
953
+ Self {
954
+ status_code: val.status_code.unwrap_or_default(),
955
+ content_type: val.content_type.unwrap_or_default(),
956
+ html: val.html.unwrap_or_default(),
957
+ body_size: val.body_size.map(|v| v as usize).unwrap_or_default(),
958
+ metadata: val.metadata.map(Into::into).unwrap_or_default(),
959
+ links: val
960
+ .links
961
+ .map(|v| v.into_iter().map(Into::into).collect())
962
+ .unwrap_or_default(),
963
+ images: val
964
+ .images
965
+ .map(|v| v.into_iter().map(Into::into).collect())
966
+ .unwrap_or_default(),
967
+ feeds: val
968
+ .feeds
969
+ .map(|v| v.into_iter().map(Into::into).collect())
970
+ .unwrap_or_default(),
971
+ json_ld: val
972
+ .json_ld
973
+ .map(|v| v.into_iter().map(Into::into).collect())
974
+ .unwrap_or_default(),
975
+ is_allowed: val.is_allowed.unwrap_or_default(),
976
+ crawl_delay: val.crawl_delay.map(|v| v as u64),
977
+ noindex_detected: val.noindex_detected.unwrap_or_default(),
978
+ nofollow_detected: val.nofollow_detected.unwrap_or_default(),
979
+ x_robots_tag: val.x_robots_tag,
980
+ is_pdf: val.is_pdf.unwrap_or_default(),
981
+ was_skipped: val.was_skipped.unwrap_or_default(),
982
+ detected_charset: val.detected_charset,
983
+ main_content_only: val.main_content_only.unwrap_or_default(),
984
+ auth_header_sent: val.auth_header_sent.unwrap_or_default(),
985
+ response_meta: val.response_meta.map(Into::into),
986
+ assets: val
987
+ .assets
988
+ .map(|v| v.into_iter().map(Into::into).collect())
989
+ .unwrap_or_default(),
990
+ js_render_hint: val.js_render_hint.unwrap_or_default(),
991
+ browser_used: val.browser_used.unwrap_or_default(),
992
+ markdown: val.markdown.map(Into::into),
993
+ extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
994
+ extraction_meta: val.extraction_meta.map(Into::into),
995
+ screenshot: val.screenshot,
996
+ downloaded_document: val.downloaded_document.map(Into::into),
997
+ }
998
+ }
999
+ }
1000
+
1001
+ impl From<kreuzcrawl::ScrapeResult> for JsScrapeResult {
1002
+ fn from(val: kreuzcrawl::ScrapeResult) -> Self {
1003
+ Self {
1004
+ status_code: Some(val.status_code),
1005
+ content_type: Some(val.content_type),
1006
+ html: Some(val.html),
1007
+ body_size: Some(val.body_size as i64),
1008
+ metadata: Some(val.metadata.into()),
1009
+ links: Some(val.links.into_iter().map(Into::into).collect()),
1010
+ images: Some(val.images.into_iter().map(Into::into).collect()),
1011
+ feeds: Some(val.feeds.into_iter().map(Into::into).collect()),
1012
+ json_ld: Some(val.json_ld.into_iter().map(Into::into).collect()),
1013
+ is_allowed: Some(val.is_allowed),
1014
+ crawl_delay: val.crawl_delay.map(|v| v as i64),
1015
+ noindex_detected: Some(val.noindex_detected),
1016
+ nofollow_detected: Some(val.nofollow_detected),
1017
+ x_robots_tag: val.x_robots_tag,
1018
+ is_pdf: Some(val.is_pdf),
1019
+ was_skipped: Some(val.was_skipped),
1020
+ detected_charset: val.detected_charset,
1021
+ main_content_only: Some(val.main_content_only),
1022
+ auth_header_sent: Some(val.auth_header_sent),
1023
+ response_meta: val.response_meta.map(Into::into),
1024
+ assets: Some(val.assets.into_iter().map(Into::into).collect()),
1025
+ js_render_hint: Some(val.js_render_hint),
1026
+ browser_used: Some(val.browser_used),
1027
+ markdown: val.markdown.map(Into::into),
1028
+ extracted_data: val.extracted_data.as_ref().map(ToString::to_string),
1029
+ extraction_meta: val.extraction_meta.map(Into::into),
1030
+ screenshot: val.screenshot.map(|v| v.to_vec()),
1031
+ downloaded_document: val.downloaded_document.map(Into::into),
1032
+ }
1033
+ }
1034
+ }
1035
+
1036
+ impl From<JsCrawlPageResult> for kreuzcrawl::CrawlPageResult {
1037
+ fn from(val: JsCrawlPageResult) -> Self {
1038
+ Self {
1039
+ url: val.url.unwrap_or_default(),
1040
+ normalized_url: val.normalized_url.unwrap_or_default(),
1041
+ status_code: val.status_code.unwrap_or_default(),
1042
+ content_type: val.content_type.unwrap_or_default(),
1043
+ html: val.html.unwrap_or_default(),
1044
+ body_size: val.body_size.map(|v| v as usize).unwrap_or_default(),
1045
+ metadata: val.metadata.map(Into::into).unwrap_or_default(),
1046
+ links: val
1047
+ .links
1048
+ .map(|v| v.into_iter().map(Into::into).collect())
1049
+ .unwrap_or_default(),
1050
+ images: val
1051
+ .images
1052
+ .map(|v| v.into_iter().map(Into::into).collect())
1053
+ .unwrap_or_default(),
1054
+ feeds: val
1055
+ .feeds
1056
+ .map(|v| v.into_iter().map(Into::into).collect())
1057
+ .unwrap_or_default(),
1058
+ json_ld: val
1059
+ .json_ld
1060
+ .map(|v| v.into_iter().map(Into::into).collect())
1061
+ .unwrap_or_default(),
1062
+ depth: val.depth.map(|v| v as usize).unwrap_or_default(),
1063
+ stayed_on_domain: val.stayed_on_domain.unwrap_or_default(),
1064
+ was_skipped: val.was_skipped.unwrap_or_default(),
1065
+ is_pdf: val.is_pdf.unwrap_or_default(),
1066
+ detected_charset: val.detected_charset,
1067
+ markdown: val.markdown.map(Into::into),
1068
+ extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
1069
+ extraction_meta: val.extraction_meta.map(Into::into),
1070
+ downloaded_document: val.downloaded_document.map(Into::into),
1071
+ }
1072
+ }
1073
+ }
1074
+
1075
+ impl From<kreuzcrawl::CrawlPageResult> for JsCrawlPageResult {
1076
+ fn from(val: kreuzcrawl::CrawlPageResult) -> Self {
1077
+ Self {
1078
+ url: Some(val.url),
1079
+ normalized_url: Some(val.normalized_url),
1080
+ status_code: Some(val.status_code),
1081
+ content_type: Some(val.content_type),
1082
+ html: Some(val.html),
1083
+ body_size: Some(val.body_size as i64),
1084
+ metadata: Some(val.metadata.into()),
1085
+ links: Some(val.links.into_iter().map(Into::into).collect()),
1086
+ images: Some(val.images.into_iter().map(Into::into).collect()),
1087
+ feeds: Some(val.feeds.into_iter().map(Into::into).collect()),
1088
+ json_ld: Some(val.json_ld.into_iter().map(Into::into).collect()),
1089
+ depth: Some(val.depth as i64),
1090
+ stayed_on_domain: Some(val.stayed_on_domain),
1091
+ was_skipped: Some(val.was_skipped),
1092
+ is_pdf: Some(val.is_pdf),
1093
+ detected_charset: val.detected_charset,
1094
+ markdown: val.markdown.map(Into::into),
1095
+ extracted_data: val.extracted_data.as_ref().map(ToString::to_string),
1096
+ extraction_meta: val.extraction_meta.map(Into::into),
1097
+ downloaded_document: val.downloaded_document.map(Into::into),
1098
+ }
1099
+ }
1100
+ }
1101
+
1102
+ impl From<JsCrawlResult> for kreuzcrawl::CrawlResult {
1103
+ fn from(val: JsCrawlResult) -> Self {
1104
+ Self {
1105
+ pages: val
1106
+ .pages
1107
+ .map(|v| v.into_iter().map(Into::into).collect())
1108
+ .unwrap_or_default(),
1109
+ final_url: val.final_url.unwrap_or_default(),
1110
+ redirect_count: val.redirect_count.map(|v| v as usize).unwrap_or_default(),
1111
+ was_skipped: val.was_skipped.unwrap_or_default(),
1112
+ error: val.error,
1113
+ cookies: val
1114
+ .cookies
1115
+ .map(|v| v.into_iter().map(Into::into).collect())
1116
+ .unwrap_or_default(),
1117
+ normalized_urls: val.normalized_urls.unwrap_or_default(),
1118
+ }
1119
+ }
1120
+ }
1121
+
1122
+ impl From<kreuzcrawl::CrawlResult> for JsCrawlResult {
1123
+ fn from(val: kreuzcrawl::CrawlResult) -> Self {
1124
+ Self {
1125
+ pages: Some(val.pages.into_iter().map(Into::into).collect()),
1126
+ final_url: Some(val.final_url),
1127
+ redirect_count: Some(val.redirect_count as i64),
1128
+ was_skipped: Some(val.was_skipped),
1129
+ error: val.error,
1130
+ cookies: Some(val.cookies.into_iter().map(Into::into).collect()),
1131
+ normalized_urls: Some(val.normalized_urls),
1132
+ }
1133
+ }
1134
+ }
1135
+
1136
+ impl From<JsSitemapUrl> for kreuzcrawl::SitemapUrl {
1137
+ fn from(val: JsSitemapUrl) -> Self {
1138
+ Self {
1139
+ url: val.url.unwrap_or_default(),
1140
+ lastmod: val.lastmod,
1141
+ changefreq: val.changefreq,
1142
+ priority: val.priority,
1143
+ }
1144
+ }
1145
+ }
1146
+
1147
+ impl From<kreuzcrawl::SitemapUrl> for JsSitemapUrl {
1148
+ fn from(val: kreuzcrawl::SitemapUrl) -> Self {
1149
+ Self {
1150
+ url: Some(val.url),
1151
+ lastmod: val.lastmod,
1152
+ changefreq: val.changefreq,
1153
+ priority: val.priority,
1154
+ }
1155
+ }
1156
+ }
1157
+
1158
+ impl From<JsMapResult> for kreuzcrawl::MapResult {
1159
+ fn from(val: JsMapResult) -> Self {
1160
+ Self {
1161
+ urls: val
1162
+ .urls
1163
+ .map(|v| v.into_iter().map(Into::into).collect())
1164
+ .unwrap_or_default(),
1165
+ }
1166
+ }
1167
+ }
1168
+
1169
+ impl From<kreuzcrawl::MapResult> for JsMapResult {
1170
+ fn from(val: kreuzcrawl::MapResult) -> Self {
1171
+ Self {
1172
+ urls: Some(val.urls.into_iter().map(Into::into).collect()),
1173
+ }
1174
+ }
1175
+ }
1176
+
1177
+ impl From<JsMarkdownResult> for kreuzcrawl::MarkdownResult {
1178
+ fn from(val: JsMarkdownResult) -> Self {
1179
+ Self {
1180
+ content: val.content.unwrap_or_default(),
1181
+ document_structure: val
1182
+ .document_structure
1183
+ .as_ref()
1184
+ .and_then(|s| serde_json::from_str(s).ok()),
1185
+ tables: val
1186
+ .tables
1187
+ .map(|v| v.into_iter().filter_map(|s| serde_json::from_str(&s).ok()).collect())
1188
+ .unwrap_or_default(),
1189
+ warnings: val.warnings.unwrap_or_default(),
1190
+ citations: val.citations.map(Into::into),
1191
+ fit_content: val.fit_content,
1192
+ }
1193
+ }
1194
+ }
1195
+
1196
+ impl From<kreuzcrawl::MarkdownResult> for JsMarkdownResult {
1197
+ fn from(val: kreuzcrawl::MarkdownResult) -> Self {
1198
+ Self {
1199
+ content: Some(val.content),
1200
+ document_structure: val.document_structure.as_ref().map(ToString::to_string),
1201
+ tables: Some(val.tables.iter().map(ToString::to_string).collect()),
1202
+ warnings: Some(val.warnings),
1203
+ citations: val.citations.map(Into::into),
1204
+ fit_content: val.fit_content,
1205
+ }
1206
+ }
1207
+ }
1208
+
1209
+ impl From<kreuzcrawl::CachedPage> for JsCachedPage {
1210
+ fn from(val: kreuzcrawl::CachedPage) -> Self {
1211
+ Self {
1212
+ url: Some(val.url),
1213
+ status_code: Some(val.status_code),
1214
+ content_type: Some(val.content_type),
1215
+ body: Some(val.body),
1216
+ etag: val.etag,
1217
+ last_modified: val.last_modified,
1218
+ cached_at: Some(val.cached_at as i64),
1219
+ }
1220
+ }
1221
+ }
1222
+
1223
+ impl From<JsLinkInfo> for kreuzcrawl::LinkInfo {
1224
+ fn from(val: JsLinkInfo) -> Self {
1225
+ Self {
1226
+ url: val.url.unwrap_or_default(),
1227
+ text: val.text.unwrap_or_default(),
1228
+ link_type: val.link_type.map(Into::into).unwrap_or_default(),
1229
+ rel: val.rel,
1230
+ nofollow: val.nofollow.unwrap_or_default(),
1231
+ }
1232
+ }
1233
+ }
1234
+
1235
+ impl From<kreuzcrawl::LinkInfo> for JsLinkInfo {
1236
+ fn from(val: kreuzcrawl::LinkInfo) -> Self {
1237
+ Self {
1238
+ url: Some(val.url),
1239
+ text: Some(val.text),
1240
+ link_type: Some(val.link_type.into()),
1241
+ rel: val.rel,
1242
+ nofollow: Some(val.nofollow),
1243
+ }
1244
+ }
1245
+ }
1246
+
1247
+ impl From<JsImageInfo> for kreuzcrawl::ImageInfo {
1248
+ fn from(val: JsImageInfo) -> Self {
1249
+ Self {
1250
+ url: val.url.unwrap_or_default(),
1251
+ alt: val.alt,
1252
+ width: val.width,
1253
+ height: val.height,
1254
+ source: val.source.map(Into::into).unwrap_or_default(),
1255
+ }
1256
+ }
1257
+ }
1258
+
1259
+ impl From<kreuzcrawl::ImageInfo> for JsImageInfo {
1260
+ fn from(val: kreuzcrawl::ImageInfo) -> Self {
1261
+ Self {
1262
+ url: Some(val.url),
1263
+ alt: val.alt,
1264
+ width: val.width,
1265
+ height: val.height,
1266
+ source: Some(val.source.into()),
1267
+ }
1268
+ }
1269
+ }
1270
+
1271
+ impl From<JsFeedInfo> for kreuzcrawl::FeedInfo {
1272
+ fn from(val: JsFeedInfo) -> Self {
1273
+ Self {
1274
+ url: val.url.unwrap_or_default(),
1275
+ title: val.title,
1276
+ feed_type: val.feed_type.map(Into::into).unwrap_or_default(),
1277
+ }
1278
+ }
1279
+ }
1280
+
1281
+ impl From<kreuzcrawl::FeedInfo> for JsFeedInfo {
1282
+ fn from(val: kreuzcrawl::FeedInfo) -> Self {
1283
+ Self {
1284
+ url: Some(val.url),
1285
+ title: val.title,
1286
+ feed_type: Some(val.feed_type.into()),
1287
+ }
1288
+ }
1289
+ }
1290
+
1291
+ impl From<JsJsonLdEntry> for kreuzcrawl::JsonLdEntry {
1292
+ fn from(val: JsJsonLdEntry) -> Self {
1293
+ Self {
1294
+ schema_type: val.schema_type.unwrap_or_default(),
1295
+ name: val.name,
1296
+ raw: val.raw.unwrap_or_default(),
1297
+ }
1298
+ }
1299
+ }
1300
+
1301
+ impl From<kreuzcrawl::JsonLdEntry> for JsJsonLdEntry {
1302
+ fn from(val: kreuzcrawl::JsonLdEntry) -> Self {
1303
+ Self {
1304
+ schema_type: Some(val.schema_type),
1305
+ name: val.name,
1306
+ raw: Some(val.raw),
1307
+ }
1308
+ }
1309
+ }
1310
+
1311
+ impl From<JsCookieInfo> for kreuzcrawl::CookieInfo {
1312
+ fn from(val: JsCookieInfo) -> Self {
1313
+ Self {
1314
+ name: val.name.unwrap_or_default(),
1315
+ value: val.value.unwrap_or_default(),
1316
+ domain: val.domain,
1317
+ path: val.path,
1318
+ }
1319
+ }
1320
+ }
1321
+
1322
+ impl From<kreuzcrawl::CookieInfo> for JsCookieInfo {
1323
+ fn from(val: kreuzcrawl::CookieInfo) -> Self {
1324
+ Self {
1325
+ name: Some(val.name),
1326
+ value: Some(val.value),
1327
+ domain: val.domain,
1328
+ path: val.path,
1329
+ }
1330
+ }
1331
+ }
1332
+
1333
+ impl From<JsDownloadedAsset> for kreuzcrawl::DownloadedAsset {
1334
+ fn from(val: JsDownloadedAsset) -> Self {
1335
+ Self {
1336
+ url: val.url.unwrap_or_default(),
1337
+ content_hash: val.content_hash.unwrap_or_default(),
1338
+ mime_type: val.mime_type,
1339
+ size: val.size.map(|v| v as usize).unwrap_or_default(),
1340
+ asset_category: val.asset_category.map(Into::into).unwrap_or_default(),
1341
+ html_tag: val.html_tag,
1342
+ }
1343
+ }
1344
+ }
1345
+
1346
+ impl From<kreuzcrawl::DownloadedAsset> for JsDownloadedAsset {
1347
+ fn from(val: kreuzcrawl::DownloadedAsset) -> Self {
1348
+ Self {
1349
+ url: Some(val.url),
1350
+ content_hash: Some(val.content_hash),
1351
+ mime_type: val.mime_type,
1352
+ size: Some(val.size as i64),
1353
+ asset_category: Some(val.asset_category.into()),
1354
+ html_tag: val.html_tag,
1355
+ }
1356
+ }
1357
+ }
1358
+
1359
+ impl From<JsArticleMetadata> for kreuzcrawl::ArticleMetadata {
1360
+ fn from(val: JsArticleMetadata) -> Self {
1361
+ Self {
1362
+ published_time: val.published_time,
1363
+ modified_time: val.modified_time,
1364
+ author: val.author,
1365
+ section: val.section,
1366
+ tags: val.tags.unwrap_or_default(),
1367
+ }
1368
+ }
1369
+ }
1370
+
1371
+ impl From<kreuzcrawl::ArticleMetadata> for JsArticleMetadata {
1372
+ fn from(val: kreuzcrawl::ArticleMetadata) -> Self {
1373
+ Self {
1374
+ published_time: val.published_time,
1375
+ modified_time: val.modified_time,
1376
+ author: val.author,
1377
+ section: val.section,
1378
+ tags: Some(val.tags),
1379
+ }
1380
+ }
1381
+ }
1382
+
1383
+ impl From<JsHreflangEntry> for kreuzcrawl::HreflangEntry {
1384
+ fn from(val: JsHreflangEntry) -> Self {
1385
+ Self {
1386
+ lang: val.lang.unwrap_or_default(),
1387
+ url: val.url.unwrap_or_default(),
1388
+ }
1389
+ }
1390
+ }
1391
+
1392
+ impl From<kreuzcrawl::HreflangEntry> for JsHreflangEntry {
1393
+ fn from(val: kreuzcrawl::HreflangEntry) -> Self {
1394
+ Self {
1395
+ lang: Some(val.lang),
1396
+ url: Some(val.url),
1397
+ }
1398
+ }
1399
+ }
1400
+
1401
+ impl From<JsFaviconInfo> for kreuzcrawl::FaviconInfo {
1402
+ fn from(val: JsFaviconInfo) -> Self {
1403
+ Self {
1404
+ url: val.url.unwrap_or_default(),
1405
+ rel: val.rel.unwrap_or_default(),
1406
+ sizes: val.sizes,
1407
+ mime_type: val.mime_type,
1408
+ }
1409
+ }
1410
+ }
1411
+
1412
+ impl From<kreuzcrawl::FaviconInfo> for JsFaviconInfo {
1413
+ fn from(val: kreuzcrawl::FaviconInfo) -> Self {
1414
+ Self {
1415
+ url: Some(val.url),
1416
+ rel: Some(val.rel),
1417
+ sizes: val.sizes,
1418
+ mime_type: val.mime_type,
1419
+ }
1420
+ }
1421
+ }
1422
+
1423
+ impl From<JsHeadingInfo> for kreuzcrawl::HeadingInfo {
1424
+ fn from(val: JsHeadingInfo) -> Self {
1425
+ Self {
1426
+ level: val.level.unwrap_or_default(),
1427
+ text: val.text.unwrap_or_default(),
1428
+ }
1429
+ }
1430
+ }
1431
+
1432
+ impl From<kreuzcrawl::HeadingInfo> for JsHeadingInfo {
1433
+ fn from(val: kreuzcrawl::HeadingInfo) -> Self {
1434
+ Self {
1435
+ level: Some(val.level),
1436
+ text: Some(val.text),
1437
+ }
1438
+ }
1439
+ }
1440
+
1441
+ impl From<JsResponseMeta> for kreuzcrawl::ResponseMeta {
1442
+ fn from(val: JsResponseMeta) -> Self {
1443
+ Self {
1444
+ etag: val.etag,
1445
+ last_modified: val.last_modified,
1446
+ cache_control: val.cache_control,
1447
+ server: val.server,
1448
+ x_powered_by: val.x_powered_by,
1449
+ content_language: val.content_language,
1450
+ content_encoding: val.content_encoding,
1451
+ }
1452
+ }
1453
+ }
1454
+
1455
+ impl From<kreuzcrawl::ResponseMeta> for JsResponseMeta {
1456
+ fn from(val: kreuzcrawl::ResponseMeta) -> Self {
1457
+ Self {
1458
+ etag: val.etag,
1459
+ last_modified: val.last_modified,
1460
+ cache_control: val.cache_control,
1461
+ server: val.server,
1462
+ x_powered_by: val.x_powered_by,
1463
+ content_language: val.content_language,
1464
+ content_encoding: val.content_encoding,
1465
+ }
1466
+ }
1467
+ }
1468
+
1469
+ impl From<JsPageMetadata> for kreuzcrawl::PageMetadata {
1470
+ fn from(val: JsPageMetadata) -> Self {
1471
+ Self {
1472
+ title: val.title,
1473
+ description: val.description,
1474
+ canonical_url: val.canonical_url,
1475
+ keywords: val.keywords,
1476
+ author: val.author,
1477
+ viewport: val.viewport,
1478
+ theme_color: val.theme_color,
1479
+ generator: val.generator,
1480
+ robots: val.robots,
1481
+ html_lang: val.html_lang,
1482
+ html_dir: val.html_dir,
1483
+ og_title: val.og_title,
1484
+ og_type: val.og_type,
1485
+ og_image: val.og_image,
1486
+ og_description: val.og_description,
1487
+ og_url: val.og_url,
1488
+ og_site_name: val.og_site_name,
1489
+ og_locale: val.og_locale,
1490
+ og_video: val.og_video,
1491
+ og_audio: val.og_audio,
1492
+ og_locale_alternates: val.og_locale_alternates,
1493
+ twitter_card: val.twitter_card,
1494
+ twitter_title: val.twitter_title,
1495
+ twitter_description: val.twitter_description,
1496
+ twitter_image: val.twitter_image,
1497
+ twitter_site: val.twitter_site,
1498
+ twitter_creator: val.twitter_creator,
1499
+ dc_title: val.dc_title,
1500
+ dc_creator: val.dc_creator,
1501
+ dc_subject: val.dc_subject,
1502
+ dc_description: val.dc_description,
1503
+ dc_publisher: val.dc_publisher,
1504
+ dc_date: val.dc_date,
1505
+ dc_type: val.dc_type,
1506
+ dc_format: val.dc_format,
1507
+ dc_identifier: val.dc_identifier,
1508
+ dc_language: val.dc_language,
1509
+ dc_rights: val.dc_rights,
1510
+ article: val.article.map(Into::into),
1511
+ hreflangs: val.hreflangs.map(|v| v.into_iter().map(Into::into).collect()),
1512
+ favicons: val.favicons.map(|v| v.into_iter().map(Into::into).collect()),
1513
+ headings: val.headings.map(|v| v.into_iter().map(Into::into).collect()),
1514
+ word_count: val.word_count.map(|v| v as usize),
1515
+ }
1516
+ }
1517
+ }
1518
+
1519
+ impl From<kreuzcrawl::PageMetadata> for JsPageMetadata {
1520
+ fn from(val: kreuzcrawl::PageMetadata) -> Self {
1521
+ Self {
1522
+ title: val.title,
1523
+ description: val.description,
1524
+ canonical_url: val.canonical_url,
1525
+ keywords: val.keywords,
1526
+ author: val.author,
1527
+ viewport: val.viewport,
1528
+ theme_color: val.theme_color,
1529
+ generator: val.generator,
1530
+ robots: val.robots,
1531
+ html_lang: val.html_lang,
1532
+ html_dir: val.html_dir,
1533
+ og_title: val.og_title,
1534
+ og_type: val.og_type,
1535
+ og_image: val.og_image,
1536
+ og_description: val.og_description,
1537
+ og_url: val.og_url,
1538
+ og_site_name: val.og_site_name,
1539
+ og_locale: val.og_locale,
1540
+ og_video: val.og_video,
1541
+ og_audio: val.og_audio,
1542
+ og_locale_alternates: val.og_locale_alternates,
1543
+ twitter_card: val.twitter_card,
1544
+ twitter_title: val.twitter_title,
1545
+ twitter_description: val.twitter_description,
1546
+ twitter_image: val.twitter_image,
1547
+ twitter_site: val.twitter_site,
1548
+ twitter_creator: val.twitter_creator,
1549
+ dc_title: val.dc_title,
1550
+ dc_creator: val.dc_creator,
1551
+ dc_subject: val.dc_subject,
1552
+ dc_description: val.dc_description,
1553
+ dc_publisher: val.dc_publisher,
1554
+ dc_date: val.dc_date,
1555
+ dc_type: val.dc_type,
1556
+ dc_format: val.dc_format,
1557
+ dc_identifier: val.dc_identifier,
1558
+ dc_language: val.dc_language,
1559
+ dc_rights: val.dc_rights,
1560
+ article: val.article.map(Into::into),
1561
+ hreflangs: val.hreflangs.map(|v| v.into_iter().map(Into::into).collect()),
1562
+ favicons: val.favicons.map(|v| v.into_iter().map(Into::into).collect()),
1563
+ headings: val.headings.map(|v| v.into_iter().map(Into::into).collect()),
1564
+ word_count: val.word_count.map(|v| v as i64),
1565
+ }
1566
+ }
1567
+ }
1568
+
1569
+ impl From<JsCitationResult> for kreuzcrawl::CitationResult {
1570
+ fn from(val: JsCitationResult) -> Self {
1571
+ Self {
1572
+ content: val.content.unwrap_or_default(),
1573
+ references: val
1574
+ .references
1575
+ .map(|v| v.into_iter().map(Into::into).collect())
1576
+ .unwrap_or_default(),
1577
+ }
1578
+ }
1579
+ }
1580
+
1581
+ impl From<kreuzcrawl::CitationResult> for JsCitationResult {
1582
+ fn from(val: kreuzcrawl::CitationResult) -> Self {
1583
+ Self {
1584
+ content: Some(val.content),
1585
+ references: Some(val.references.into_iter().map(Into::into).collect()),
1586
+ }
1587
+ }
1588
+ }
1589
+
1590
+ impl From<JsCitationReference> for kreuzcrawl::CitationReference {
1591
+ fn from(val: JsCitationReference) -> Self {
1592
+ Self {
1593
+ index: val.index.map(|v| v as usize).unwrap_or_default(),
1594
+ url: val.url.unwrap_or_default(),
1595
+ text: val.text.unwrap_or_default(),
1596
+ }
1597
+ }
1598
+ }
1599
+
1600
+ impl From<kreuzcrawl::CitationReference> for JsCitationReference {
1601
+ fn from(val: kreuzcrawl::CitationReference) -> Self {
1602
+ Self {
1603
+ index: Some(val.index as i64),
1604
+ url: Some(val.url),
1605
+ text: Some(val.text),
1606
+ }
1607
+ }
1608
+ }
1609
+
1610
+ impl From<JsBatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
1611
+ fn from(val: JsBatchScrapeResult) -> Self {
1612
+ Self {
1613
+ url: val.url.unwrap_or_default(),
1614
+ result: val.result.map(Into::into),
1615
+ error: val.error,
1616
+ }
1617
+ }
1618
+ }
1619
+
1620
+ impl From<kreuzcrawl::BatchScrapeResult> for JsBatchScrapeResult {
1621
+ fn from(val: kreuzcrawl::BatchScrapeResult) -> Self {
1622
+ Self {
1623
+ url: Some(val.url),
1624
+ result: val.result.map(Into::into),
1625
+ error: val.error,
1626
+ }
1627
+ }
1628
+ }
1629
+
1630
+ impl From<JsBatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
1631
+ fn from(val: JsBatchCrawlResult) -> Self {
1632
+ Self {
1633
+ url: val.url.unwrap_or_default(),
1634
+ result: val.result.map(Into::into),
1635
+ error: val.error,
1636
+ }
1637
+ }
1638
+ }
1639
+
1640
+ impl From<kreuzcrawl::BatchCrawlResult> for JsBatchCrawlResult {
1641
+ fn from(val: kreuzcrawl::BatchCrawlResult) -> Self {
1642
+ Self {
1643
+ url: Some(val.url),
1644
+ result: val.result.map(Into::into),
1645
+ error: val.error,
1646
+ }
1647
+ }
1648
+ }
1649
+
1650
+ impl From<JsBrowserMode> for kreuzcrawl::BrowserMode {
1651
+ fn from(val: JsBrowserMode) -> Self {
1652
+ match val {
1653
+ JsBrowserMode::Auto => Self::Auto,
1654
+ JsBrowserMode::Always => Self::Always,
1655
+ JsBrowserMode::Never => Self::Never,
1656
+ }
1657
+ }
1658
+ }
1659
+
1660
+ impl From<kreuzcrawl::BrowserMode> for JsBrowserMode {
1661
+ fn from(val: kreuzcrawl::BrowserMode) -> Self {
1662
+ match val {
1663
+ kreuzcrawl::BrowserMode::Auto => Self::Auto,
1664
+ kreuzcrawl::BrowserMode::Always => Self::Always,
1665
+ kreuzcrawl::BrowserMode::Never => Self::Never,
1666
+ }
1667
+ }
1668
+ }
1669
+
1670
+ impl From<JsBrowserWait> for kreuzcrawl::BrowserWait {
1671
+ fn from(val: JsBrowserWait) -> Self {
1672
+ match val {
1673
+ JsBrowserWait::NetworkIdle => Self::NetworkIdle,
1674
+ JsBrowserWait::Selector => Self::Selector,
1675
+ JsBrowserWait::Fixed => Self::Fixed,
1676
+ }
1677
+ }
1678
+ }
1679
+
1680
+ impl From<kreuzcrawl::BrowserWait> for JsBrowserWait {
1681
+ fn from(val: kreuzcrawl::BrowserWait) -> Self {
1682
+ match val {
1683
+ kreuzcrawl::BrowserWait::NetworkIdle => Self::NetworkIdle,
1684
+ kreuzcrawl::BrowserWait::Selector => Self::Selector,
1685
+ kreuzcrawl::BrowserWait::Fixed => Self::Fixed,
1686
+ }
1687
+ }
1688
+ }
1689
+
1690
+ impl From<JsAuthConfig> for kreuzcrawl::AuthConfig {
1691
+ fn from(val: JsAuthConfig) -> Self {
1692
+ match val.type_tag.as_str() {
1693
+ "basic" => Self::Basic {
1694
+ username: val.username.unwrap_or_default(),
1695
+ password: val.password.unwrap_or_default(),
1696
+ },
1697
+ "bearer" => Self::Bearer {
1698
+ token: val.token.unwrap_or_default(),
1699
+ },
1700
+ "header" => Self::Header {
1701
+ name: val.name.unwrap_or_default(),
1702
+ value: val.value.unwrap_or_default(),
1703
+ },
1704
+ _ => Self::Basic {
1705
+ username: Default::default(),
1706
+ password: Default::default(),
1707
+ },
1708
+ }
1709
+ }
1710
+ }
1711
+
1712
+ impl From<kreuzcrawl::AuthConfig> for JsAuthConfig {
1713
+ fn from(val: kreuzcrawl::AuthConfig) -> Self {
1714
+ match val {
1715
+ kreuzcrawl::AuthConfig::Basic { username, password } => Self {
1716
+ type_tag: "basic".to_string(),
1717
+ name: None,
1718
+ password: Some(password),
1719
+ token: None,
1720
+ username: Some(username),
1721
+ value: None,
1722
+ },
1723
+ kreuzcrawl::AuthConfig::Bearer { token } => Self {
1724
+ type_tag: "bearer".to_string(),
1725
+ name: None,
1726
+ password: None,
1727
+ token: Some(token),
1728
+ username: None,
1729
+ value: None,
1730
+ },
1731
+ kreuzcrawl::AuthConfig::Header { name, value } => Self {
1732
+ type_tag: "header".to_string(),
1733
+ name: Some(name),
1734
+ password: None,
1735
+ token: None,
1736
+ username: None,
1737
+ value: Some(value),
1738
+ },
1739
+ }
1740
+ }
1741
+ }
1742
+
1743
+ impl From<JsLinkType> for kreuzcrawl::LinkType {
1744
+ fn from(val: JsLinkType) -> Self {
1745
+ match val {
1746
+ JsLinkType::Internal => Self::Internal,
1747
+ JsLinkType::External => Self::External,
1748
+ JsLinkType::Anchor => Self::Anchor,
1749
+ JsLinkType::Document => Self::Document,
1750
+ }
1751
+ }
1752
+ }
1753
+
1754
+ impl From<kreuzcrawl::LinkType> for JsLinkType {
1755
+ fn from(val: kreuzcrawl::LinkType) -> Self {
1756
+ match val {
1757
+ kreuzcrawl::LinkType::Internal => Self::Internal,
1758
+ kreuzcrawl::LinkType::External => Self::External,
1759
+ kreuzcrawl::LinkType::Anchor => Self::Anchor,
1760
+ kreuzcrawl::LinkType::Document => Self::Document,
1761
+ }
1762
+ }
1763
+ }
1764
+
1765
+ impl From<JsImageSource> for kreuzcrawl::ImageSource {
1766
+ fn from(val: JsImageSource) -> Self {
1767
+ match val {
1768
+ JsImageSource::Img => Self::Img,
1769
+ JsImageSource::PictureSource => Self::PictureSource,
1770
+ JsImageSource::OgImage => Self::OgImage,
1771
+ JsImageSource::TwitterImage => Self::TwitterImage,
1772
+ }
1773
+ }
1774
+ }
1775
+
1776
+ impl From<kreuzcrawl::ImageSource> for JsImageSource {
1777
+ fn from(val: kreuzcrawl::ImageSource) -> Self {
1778
+ match val {
1779
+ kreuzcrawl::ImageSource::Img => Self::Img,
1780
+ kreuzcrawl::ImageSource::PictureSource => Self::PictureSource,
1781
+ kreuzcrawl::ImageSource::OgImage => Self::OgImage,
1782
+ kreuzcrawl::ImageSource::TwitterImage => Self::TwitterImage,
1783
+ }
1784
+ }
1785
+ }
1786
+
1787
+ impl From<JsFeedType> for kreuzcrawl::FeedType {
1788
+ fn from(val: JsFeedType) -> Self {
1789
+ match val {
1790
+ JsFeedType::Rss => Self::Rss,
1791
+ JsFeedType::Atom => Self::Atom,
1792
+ JsFeedType::JsonFeed => Self::JsonFeed,
1793
+ }
1794
+ }
1795
+ }
1796
+
1797
+ impl From<kreuzcrawl::FeedType> for JsFeedType {
1798
+ fn from(val: kreuzcrawl::FeedType) -> Self {
1799
+ match val {
1800
+ kreuzcrawl::FeedType::Rss => Self::Rss,
1801
+ kreuzcrawl::FeedType::Atom => Self::Atom,
1802
+ kreuzcrawl::FeedType::JsonFeed => Self::JsonFeed,
1803
+ }
1804
+ }
1805
+ }
1806
+
1807
+ impl From<JsAssetCategory> for kreuzcrawl::AssetCategory {
1808
+ fn from(val: JsAssetCategory) -> Self {
1809
+ match val {
1810
+ JsAssetCategory::Document => Self::Document,
1811
+ JsAssetCategory::Image => Self::Image,
1812
+ JsAssetCategory::Audio => Self::Audio,
1813
+ JsAssetCategory::Video => Self::Video,
1814
+ JsAssetCategory::Font => Self::Font,
1815
+ JsAssetCategory::Stylesheet => Self::Stylesheet,
1816
+ JsAssetCategory::Script => Self::Script,
1817
+ JsAssetCategory::Archive => Self::Archive,
1818
+ JsAssetCategory::Data => Self::Data,
1819
+ JsAssetCategory::Other => Self::Other,
1820
+ }
1821
+ }
1822
+ }
1823
+
1824
+ impl From<kreuzcrawl::AssetCategory> for JsAssetCategory {
1825
+ fn from(val: kreuzcrawl::AssetCategory) -> Self {
1826
+ match val {
1827
+ kreuzcrawl::AssetCategory::Document => Self::Document,
1828
+ kreuzcrawl::AssetCategory::Image => Self::Image,
1829
+ kreuzcrawl::AssetCategory::Audio => Self::Audio,
1830
+ kreuzcrawl::AssetCategory::Video => Self::Video,
1831
+ kreuzcrawl::AssetCategory::Font => Self::Font,
1832
+ kreuzcrawl::AssetCategory::Stylesheet => Self::Stylesheet,
1833
+ kreuzcrawl::AssetCategory::Script => Self::Script,
1834
+ kreuzcrawl::AssetCategory::Archive => Self::Archive,
1835
+ kreuzcrawl::AssetCategory::Data => Self::Data,
1836
+ kreuzcrawl::AssetCategory::Other => Self::Other,
1837
+ }
1838
+ }
1839
+ }
1840
+
1841
+ impl From<kreuzcrawl::CrawlEvent> for JsCrawlEvent {
1842
+ fn from(val: kreuzcrawl::CrawlEvent) -> Self {
1843
+ match val {
1844
+ kreuzcrawl::CrawlEvent::Page(..) => Self::Page,
1845
+ kreuzcrawl::CrawlEvent::Error { .. } => Self::Error,
1846
+ kreuzcrawl::CrawlEvent::Complete { .. } => Self::Complete,
1847
+ }
1848
+ }
1849
+ }
1850
+
1851
+ // Error variant name constants
1852
+ pub const CRAWL_ERROR_ERROR_NOT_FOUND: &str = "NotFound";
1853
+ pub const CRAWL_ERROR_ERROR_UNAUTHORIZED: &str = "Unauthorized";
1854
+ pub const CRAWL_ERROR_ERROR_FORBIDDEN: &str = "Forbidden";
1855
+ pub const CRAWL_ERROR_ERROR_WAF_BLOCKED: &str = "WafBlocked";
1856
+ pub const CRAWL_ERROR_ERROR_TIMEOUT: &str = "Timeout";
1857
+ pub const CRAWL_ERROR_ERROR_RATE_LIMITED: &str = "RateLimited";
1858
+ pub const CRAWL_ERROR_ERROR_SERVER_ERROR: &str = "ServerError";
1859
+ pub const CRAWL_ERROR_ERROR_BAD_GATEWAY: &str = "BadGateway";
1860
+ pub const CRAWL_ERROR_ERROR_GONE: &str = "Gone";
1861
+ pub const CRAWL_ERROR_ERROR_CONNECTION: &str = "Connection";
1862
+ pub const CRAWL_ERROR_ERROR_DNS: &str = "Dns";
1863
+ pub const CRAWL_ERROR_ERROR_SSL: &str = "Ssl";
1864
+ pub const CRAWL_ERROR_ERROR_DATA_LOSS: &str = "DataLoss";
1865
+ pub const CRAWL_ERROR_ERROR_BROWSER_ERROR: &str = "BrowserError";
1866
+ pub const CRAWL_ERROR_ERROR_BROWSER_TIMEOUT: &str = "BrowserTimeout";
1867
+ pub const CRAWL_ERROR_ERROR_INVALID_CONFIG: &str = "InvalidConfig";
1868
+ pub const CRAWL_ERROR_ERROR_OTHER: &str = "Other";
1869
+
1870
+ /// Convert a `kreuzcrawl::CrawlError` error to a NAPI error.
1871
+ #[allow(dead_code)]
1872
+ fn crawl_error_to_napi_err(e: kreuzcrawl::CrawlError) -> napi::Error {
1873
+ let msg = e.to_string();
1874
+ #[allow(unreachable_patterns)]
1875
+ match &e {
1876
+ kreuzcrawl::CrawlError::NotFound(..) => {
1877
+ napi::Error::new(napi::Status::GenericFailure, format!("[NotFound] {}", msg))
1878
+ }
1879
+ kreuzcrawl::CrawlError::Unauthorized(..) => {
1880
+ napi::Error::new(napi::Status::GenericFailure, format!("[Unauthorized] {}", msg))
1881
+ }
1882
+ kreuzcrawl::CrawlError::Forbidden(..) => {
1883
+ napi::Error::new(napi::Status::GenericFailure, format!("[Forbidden] {}", msg))
1884
+ }
1885
+ kreuzcrawl::CrawlError::WafBlocked(..) => {
1886
+ napi::Error::new(napi::Status::GenericFailure, format!("[WafBlocked] {}", msg))
1887
+ }
1888
+ kreuzcrawl::CrawlError::Timeout(..) => {
1889
+ napi::Error::new(napi::Status::GenericFailure, format!("[Timeout] {}", msg))
1890
+ }
1891
+ kreuzcrawl::CrawlError::RateLimited(..) => {
1892
+ napi::Error::new(napi::Status::GenericFailure, format!("[RateLimited] {}", msg))
1893
+ }
1894
+ kreuzcrawl::CrawlError::ServerError(..) => {
1895
+ napi::Error::new(napi::Status::GenericFailure, format!("[ServerError] {}", msg))
1896
+ }
1897
+ kreuzcrawl::CrawlError::BadGateway(..) => {
1898
+ napi::Error::new(napi::Status::GenericFailure, format!("[BadGateway] {}", msg))
1899
+ }
1900
+ kreuzcrawl::CrawlError::Gone(..) => napi::Error::new(napi::Status::GenericFailure, format!("[Gone] {}", msg)),
1901
+ kreuzcrawl::CrawlError::Connection(..) => {
1902
+ napi::Error::new(napi::Status::GenericFailure, format!("[Connection] {}", msg))
1903
+ }
1904
+ kreuzcrawl::CrawlError::Dns(..) => napi::Error::new(napi::Status::GenericFailure, format!("[Dns] {}", msg)),
1905
+ kreuzcrawl::CrawlError::Ssl(..) => napi::Error::new(napi::Status::GenericFailure, format!("[Ssl] {}", msg)),
1906
+ kreuzcrawl::CrawlError::DataLoss(..) => {
1907
+ napi::Error::new(napi::Status::GenericFailure, format!("[DataLoss] {}", msg))
1908
+ }
1909
+ kreuzcrawl::CrawlError::BrowserError(..) => {
1910
+ napi::Error::new(napi::Status::GenericFailure, format!("[BrowserError] {}", msg))
1911
+ }
1912
+ kreuzcrawl::CrawlError::BrowserTimeout(..) => {
1913
+ napi::Error::new(napi::Status::GenericFailure, format!("[BrowserTimeout] {}", msg))
1914
+ }
1915
+ kreuzcrawl::CrawlError::InvalidConfig(..) => {
1916
+ napi::Error::new(napi::Status::GenericFailure, format!("[InvalidConfig] {}", msg))
1917
+ }
1918
+ kreuzcrawl::CrawlError::Other(..) => napi::Error::new(napi::Status::GenericFailure, format!("[Other] {}", msg)),
1919
+ _ => napi::Error::new(napi::Status::GenericFailure, msg),
1920
+ }
1921
+ }