kreuzcrawl 0.1.0.pre.rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4835 @@
1
+ // This file is auto-generated by alef. DO NOT EDIT.
2
+ // Re-generate with: alef generate
3
+ #![allow(dead_code)]
4
+
5
+ use magnus::{Error, IntoValueFromNative, Ruby, function, method, prelude::*, try_convert::TryConvertOwned};
6
+ use std::collections::HashMap;
7
+ use std::sync::Arc;
8
+
9
+ fn json_to_ruby(handle: &Ruby, val: serde_json::Value) -> magnus::Value {
10
+ use magnus::IntoValue;
11
+ match val {
12
+ serde_json::Value::Null => handle.qnil().into_value_with(handle),
13
+ serde_json::Value::Bool(b) => b.into_value_with(handle),
14
+ serde_json::Value::Number(n) => {
15
+ if let Some(i) = n.as_i64() {
16
+ i.into_value_with(handle)
17
+ } else if let Some(f) = n.as_f64() {
18
+ f.into_value_with(handle)
19
+ } else {
20
+ handle.qnil().into_value_with(handle)
21
+ }
22
+ }
23
+ serde_json::Value::String(s) => s.into_value_with(handle),
24
+ serde_json::Value::Array(arr) => {
25
+ let ruby_arr = handle.ary_new_capa(arr.len());
26
+ for item in arr {
27
+ let _ = ruby_arr.push(json_to_ruby(handle, item));
28
+ }
29
+ ruby_arr.into_value_with(handle)
30
+ }
31
+ serde_json::Value::Object(map) => {
32
+ let hash = handle.hash_new();
33
+ for (k, v) in map {
34
+ let key = handle.to_symbol(&k);
35
+ let val = json_to_ruby(handle, v);
36
+ let _ = hash.aset(key, val);
37
+ }
38
+ hash.into_value_with(handle)
39
+ }
40
+ }
41
+ }
42
+
43
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
44
+ #[magnus::wrap(class = "Kreuzcrawl::ExtractionMeta")]
45
+ #[serde(default)]
46
+ pub struct ExtractionMeta {
47
+ pub cost: Option<f64>,
48
+ pub prompt_tokens: Option<u64>,
49
+ pub completion_tokens: Option<u64>,
50
+ pub model: Option<String>,
51
+ pub chunks_processed: usize,
52
+ }
53
+
54
+ unsafe impl IntoValueFromNative for ExtractionMeta {}
55
+
56
+ impl magnus::TryConvert for ExtractionMeta {
57
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
58
+ let r: &ExtractionMeta = magnus::TryConvert::try_convert(val)?;
59
+ Ok(r.clone())
60
+ }
61
+ }
62
+ unsafe impl TryConvertOwned for ExtractionMeta {}
63
+
64
+ impl Default for ExtractionMeta {
65
+ fn default() -> Self {
66
+ Self {
67
+ cost: Default::default(),
68
+ prompt_tokens: Default::default(),
69
+ completion_tokens: Default::default(),
70
+ model: Default::default(),
71
+ chunks_processed: Default::default(),
72
+ }
73
+ }
74
+ }
75
+
76
+ impl ExtractionMeta {
77
+ fn new(
78
+ cost: Option<f64>,
79
+ prompt_tokens: Option<u64>,
80
+ completion_tokens: Option<u64>,
81
+ model: Option<String>,
82
+ chunks_processed: Option<usize>,
83
+ ) -> Self {
84
+ Self {
85
+ cost,
86
+ prompt_tokens,
87
+ completion_tokens,
88
+ model,
89
+ chunks_processed: chunks_processed.unwrap_or_default(),
90
+ }
91
+ }
92
+
93
+ fn cost(&self) -> Option<f64> {
94
+ self.cost
95
+ }
96
+
97
+ fn prompt_tokens(&self) -> Option<u64> {
98
+ self.prompt_tokens
99
+ }
100
+
101
+ fn completion_tokens(&self) -> Option<u64> {
102
+ self.completion_tokens
103
+ }
104
+
105
+ fn model(&self) -> Option<String> {
106
+ self.model.clone()
107
+ }
108
+
109
+ fn chunks_processed(&self) -> usize {
110
+ self.chunks_processed
111
+ }
112
+ }
113
+
114
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
115
+ #[magnus::wrap(class = "Kreuzcrawl::ProxyConfig")]
116
+ #[serde(default)]
117
+ pub struct ProxyConfig {
118
+ pub url: String,
119
+ pub username: Option<String>,
120
+ pub password: Option<String>,
121
+ }
122
+
123
+ unsafe impl IntoValueFromNative for ProxyConfig {}
124
+
125
+ impl magnus::TryConvert for ProxyConfig {
126
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
127
+ let r: &ProxyConfig = magnus::TryConvert::try_convert(val)?;
128
+ Ok(r.clone())
129
+ }
130
+ }
131
+ unsafe impl TryConvertOwned for ProxyConfig {}
132
+
133
+ impl Default for ProxyConfig {
134
+ fn default() -> Self {
135
+ Self {
136
+ url: Default::default(),
137
+ username: Default::default(),
138
+ password: Default::default(),
139
+ }
140
+ }
141
+ }
142
+
143
+ impl ProxyConfig {
144
+ fn new(url: Option<String>, username: Option<String>, password: Option<String>) -> Self {
145
+ Self {
146
+ url: url.unwrap_or_default(),
147
+ username,
148
+ password,
149
+ }
150
+ }
151
+
152
+ fn url(&self) -> String {
153
+ self.url.clone()
154
+ }
155
+
156
+ fn username(&self) -> Option<String> {
157
+ self.username.clone()
158
+ }
159
+
160
+ fn password(&self) -> Option<String> {
161
+ self.password.clone()
162
+ }
163
+ }
164
+
165
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
166
+ #[magnus::wrap(class = "Kreuzcrawl::BrowserConfig")]
167
+ #[serde(default)]
168
+ pub struct BrowserConfig {
169
+ pub mode: BrowserMode,
170
+ pub endpoint: Option<String>,
171
+ pub timeout: u64,
172
+ pub wait: BrowserWait,
173
+ pub wait_selector: Option<String>,
174
+ pub extra_wait: Option<u64>,
175
+ }
176
+
177
+ unsafe impl IntoValueFromNative for BrowserConfig {}
178
+
179
+ impl magnus::TryConvert for BrowserConfig {
180
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
181
+ let r: &BrowserConfig = magnus::TryConvert::try_convert(val)?;
182
+ Ok(r.clone())
183
+ }
184
+ }
185
+ unsafe impl TryConvertOwned for BrowserConfig {}
186
+
187
+ impl BrowserConfig {
188
+ fn new(
189
+ mode: Option<BrowserMode>,
190
+ endpoint: Option<String>,
191
+ timeout: Option<u64>,
192
+ wait: Option<BrowserWait>,
193
+ wait_selector: Option<String>,
194
+ extra_wait: Option<u64>,
195
+ ) -> Self {
196
+ Self {
197
+ mode: mode.unwrap_or(BrowserMode::Auto),
198
+ endpoint,
199
+ timeout: timeout.unwrap_or(30000),
200
+ wait: wait.unwrap_or_default(),
201
+ wait_selector,
202
+ extra_wait,
203
+ }
204
+ }
205
+
206
+ fn mode(&self) -> BrowserMode {
207
+ self.mode.clone()
208
+ }
209
+
210
+ fn endpoint(&self) -> Option<String> {
211
+ self.endpoint.clone()
212
+ }
213
+
214
+ fn timeout(&self) -> u64 {
215
+ self.timeout.clone()
216
+ }
217
+
218
+ fn wait(&self) -> BrowserWait {
219
+ self.wait.clone()
220
+ }
221
+
222
+ fn wait_selector(&self) -> Option<String> {
223
+ self.wait_selector.clone()
224
+ }
225
+
226
+ fn extra_wait(&self) -> Option<u64> {
227
+ self.extra_wait.clone()
228
+ }
229
+ }
230
+
231
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
232
+ #[magnus::wrap(class = "Kreuzcrawl::CrawlConfig")]
233
+ #[serde(default)]
234
+ pub struct CrawlConfig {
235
+ pub max_depth: Option<usize>,
236
+ pub max_pages: Option<usize>,
237
+ pub max_concurrent: Option<usize>,
238
+ pub respect_robots_txt: bool,
239
+ pub user_agent: Option<String>,
240
+ pub stay_on_domain: bool,
241
+ pub allow_subdomains: bool,
242
+ pub include_paths: Vec<String>,
243
+ pub exclude_paths: Vec<String>,
244
+ pub custom_headers: HashMap<String, String>,
245
+ pub request_timeout: u64,
246
+ pub max_redirects: usize,
247
+ pub retry_count: usize,
248
+ pub retry_codes: Vec<u16>,
249
+ pub cookies_enabled: bool,
250
+ pub auth: Option<AuthConfig>,
251
+ pub max_body_size: Option<usize>,
252
+ pub main_content_only: bool,
253
+ pub remove_tags: Vec<String>,
254
+ pub map_limit: Option<usize>,
255
+ pub map_search: Option<String>,
256
+ pub download_assets: bool,
257
+ pub asset_types: Vec<AssetCategory>,
258
+ pub max_asset_size: Option<usize>,
259
+ pub browser: BrowserConfig,
260
+ pub proxy: Option<ProxyConfig>,
261
+ pub user_agents: Vec<String>,
262
+ pub capture_screenshot: bool,
263
+ pub download_documents: bool,
264
+ pub document_max_size: Option<usize>,
265
+ pub document_mime_types: Vec<String>,
266
+ pub warc_output: Option<String>,
267
+ pub browser_profile: Option<String>,
268
+ pub save_browser_profile: bool,
269
+ }
270
+
271
+ unsafe impl IntoValueFromNative for CrawlConfig {}
272
+
273
+ impl magnus::TryConvert for CrawlConfig {
274
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
275
+ let r: &CrawlConfig = magnus::TryConvert::try_convert(val)?;
276
+ Ok(r.clone())
277
+ }
278
+ }
279
+ unsafe impl TryConvertOwned for CrawlConfig {}
280
+
281
+ impl CrawlConfig {
282
+ fn new(kwargs: magnus::RHash) -> Result<Self, magnus::Error> {
283
+ let ruby = unsafe { magnus::Ruby::get_unchecked() };
284
+ Ok(Self {
285
+ max_depth: kwargs
286
+ .get(ruby.to_symbol("max_depth"))
287
+ .and_then(|v| usize::try_convert(v).ok()),
288
+ max_pages: kwargs
289
+ .get(ruby.to_symbol("max_pages"))
290
+ .and_then(|v| usize::try_convert(v).ok()),
291
+ max_concurrent: kwargs
292
+ .get(ruby.to_symbol("max_concurrent"))
293
+ .and_then(|v| usize::try_convert(v).ok()),
294
+ respect_robots_txt: kwargs
295
+ .get(ruby.to_symbol("respect_robots_txt"))
296
+ .and_then(|v| bool::try_convert(v).ok())
297
+ .unwrap_or(false),
298
+ user_agent: kwargs
299
+ .get(ruby.to_symbol("user_agent"))
300
+ .and_then(|v| String::try_convert(v).ok()),
301
+ stay_on_domain: kwargs
302
+ .get(ruby.to_symbol("stay_on_domain"))
303
+ .and_then(|v| bool::try_convert(v).ok())
304
+ .unwrap_or(false),
305
+ allow_subdomains: kwargs
306
+ .get(ruby.to_symbol("allow_subdomains"))
307
+ .and_then(|v| bool::try_convert(v).ok())
308
+ .unwrap_or(false),
309
+ include_paths: kwargs
310
+ .get(ruby.to_symbol("include_paths"))
311
+ .and_then(|v| <Vec<String>>::try_convert(v).ok())
312
+ .unwrap_or_default(),
313
+ exclude_paths: kwargs
314
+ .get(ruby.to_symbol("exclude_paths"))
315
+ .and_then(|v| <Vec<String>>::try_convert(v).ok())
316
+ .unwrap_or_default(),
317
+ custom_headers: kwargs
318
+ .get(ruby.to_symbol("custom_headers"))
319
+ .and_then(|v| <HashMap<String, String>>::try_convert(v).ok())
320
+ .unwrap_or_default(),
321
+ request_timeout: kwargs
322
+ .get(ruby.to_symbol("request_timeout"))
323
+ .and_then(|v| u64::try_convert(v).ok())
324
+ .unwrap_or(30000),
325
+ max_redirects: kwargs
326
+ .get(ruby.to_symbol("max_redirects"))
327
+ .and_then(|v| usize::try_convert(v).ok())
328
+ .unwrap_or(10),
329
+ retry_count: kwargs
330
+ .get(ruby.to_symbol("retry_count"))
331
+ .and_then(|v| usize::try_convert(v).ok())
332
+ .unwrap_or(0),
333
+ retry_codes: kwargs
334
+ .get(ruby.to_symbol("retry_codes"))
335
+ .and_then(|v| <Vec<u16>>::try_convert(v).ok())
336
+ .unwrap_or_default(),
337
+ cookies_enabled: kwargs
338
+ .get(ruby.to_symbol("cookies_enabled"))
339
+ .and_then(|v| bool::try_convert(v).ok())
340
+ .unwrap_or(false),
341
+ auth: kwargs
342
+ .get(ruby.to_symbol("auth"))
343
+ .and_then(|v| AuthConfig::try_convert(v).ok()),
344
+ max_body_size: kwargs
345
+ .get(ruby.to_symbol("max_body_size"))
346
+ .and_then(|v| usize::try_convert(v).ok()),
347
+ main_content_only: kwargs
348
+ .get(ruby.to_symbol("main_content_only"))
349
+ .and_then(|v| bool::try_convert(v).ok())
350
+ .unwrap_or(false),
351
+ remove_tags: kwargs
352
+ .get(ruby.to_symbol("remove_tags"))
353
+ .and_then(|v| <Vec<String>>::try_convert(v).ok())
354
+ .unwrap_or_default(),
355
+ map_limit: kwargs
356
+ .get(ruby.to_symbol("map_limit"))
357
+ .and_then(|v| usize::try_convert(v).ok()),
358
+ map_search: kwargs
359
+ .get(ruby.to_symbol("map_search"))
360
+ .and_then(|v| String::try_convert(v).ok()),
361
+ download_assets: kwargs
362
+ .get(ruby.to_symbol("download_assets"))
363
+ .and_then(|v| bool::try_convert(v).ok())
364
+ .unwrap_or(false),
365
+ asset_types: kwargs
366
+ .get(ruby.to_symbol("asset_types"))
367
+ .and_then(|v| <Vec<AssetCategory>>::try_convert(v).ok())
368
+ .unwrap_or_default(),
369
+ max_asset_size: kwargs
370
+ .get(ruby.to_symbol("max_asset_size"))
371
+ .and_then(|v| usize::try_convert(v).ok()),
372
+ browser: kwargs
373
+ .get(ruby.to_symbol("browser"))
374
+ .and_then(|v| BrowserConfig::try_convert(v).ok())
375
+ .unwrap_or_default(),
376
+ proxy: kwargs
377
+ .get(ruby.to_symbol("proxy"))
378
+ .and_then(|v| ProxyConfig::try_convert(v).ok()),
379
+ user_agents: kwargs
380
+ .get(ruby.to_symbol("user_agents"))
381
+ .and_then(|v| <Vec<String>>::try_convert(v).ok())
382
+ .unwrap_or_default(),
383
+ capture_screenshot: kwargs
384
+ .get(ruby.to_symbol("capture_screenshot"))
385
+ .and_then(|v| bool::try_convert(v).ok())
386
+ .unwrap_or(false),
387
+ download_documents: kwargs
388
+ .get(ruby.to_symbol("download_documents"))
389
+ .and_then(|v| bool::try_convert(v).ok())
390
+ .unwrap_or(true),
391
+ document_max_size: kwargs
392
+ .get(ruby.to_symbol("document_max_size"))
393
+ .and_then(|v| usize::try_convert(v).ok()),
394
+ document_mime_types: kwargs
395
+ .get(ruby.to_symbol("document_mime_types"))
396
+ .and_then(|v| <Vec<String>>::try_convert(v).ok())
397
+ .unwrap_or_default(),
398
+ warc_output: kwargs
399
+ .get(ruby.to_symbol("warc_output"))
400
+ .and_then(|v| String::try_convert(v).ok()),
401
+ browser_profile: kwargs
402
+ .get(ruby.to_symbol("browser_profile"))
403
+ .and_then(|v| String::try_convert(v).ok()),
404
+ save_browser_profile: kwargs
405
+ .get(ruby.to_symbol("save_browser_profile"))
406
+ .and_then(|v| bool::try_convert(v).ok())
407
+ .unwrap_or(false),
408
+ })
409
+ }
410
+
411
+ fn max_depth(&self) -> Option<usize> {
412
+ self.max_depth
413
+ }
414
+
415
+ fn max_pages(&self) -> Option<usize> {
416
+ self.max_pages
417
+ }
418
+
419
+ fn max_concurrent(&self) -> Option<usize> {
420
+ self.max_concurrent
421
+ }
422
+
423
+ fn respect_robots_txt(&self) -> bool {
424
+ self.respect_robots_txt
425
+ }
426
+
427
+ fn user_agent(&self) -> Option<String> {
428
+ self.user_agent.clone()
429
+ }
430
+
431
+ fn stay_on_domain(&self) -> bool {
432
+ self.stay_on_domain
433
+ }
434
+
435
+ fn allow_subdomains(&self) -> bool {
436
+ self.allow_subdomains
437
+ }
438
+
439
+ fn include_paths(&self) -> Vec<String> {
440
+ self.include_paths.clone()
441
+ }
442
+
443
+ fn exclude_paths(&self) -> Vec<String> {
444
+ self.exclude_paths.clone()
445
+ }
446
+
447
+ fn custom_headers(&self) -> HashMap<String, String> {
448
+ self.custom_headers.clone()
449
+ }
450
+
451
+ fn request_timeout(&self) -> u64 {
452
+ self.request_timeout.clone()
453
+ }
454
+
455
+ fn max_redirects(&self) -> usize {
456
+ self.max_redirects
457
+ }
458
+
459
+ fn retry_count(&self) -> usize {
460
+ self.retry_count
461
+ }
462
+
463
+ fn retry_codes(&self) -> Vec<u16> {
464
+ self.retry_codes.clone()
465
+ }
466
+
467
+ fn cookies_enabled(&self) -> bool {
468
+ self.cookies_enabled
469
+ }
470
+
471
+ fn auth(&self) -> Option<AuthConfig> {
472
+ self.auth.clone()
473
+ }
474
+
475
+ fn max_body_size(&self) -> Option<usize> {
476
+ self.max_body_size
477
+ }
478
+
479
+ fn main_content_only(&self) -> bool {
480
+ self.main_content_only
481
+ }
482
+
483
+ fn remove_tags(&self) -> Vec<String> {
484
+ self.remove_tags.clone()
485
+ }
486
+
487
+ fn map_limit(&self) -> Option<usize> {
488
+ self.map_limit
489
+ }
490
+
491
+ fn map_search(&self) -> Option<String> {
492
+ self.map_search.clone()
493
+ }
494
+
495
+ fn download_assets(&self) -> bool {
496
+ self.download_assets
497
+ }
498
+
499
+ fn asset_types(&self) -> Vec<AssetCategory> {
500
+ self.asset_types.clone()
501
+ }
502
+
503
+ fn max_asset_size(&self) -> Option<usize> {
504
+ self.max_asset_size
505
+ }
506
+
507
+ fn browser(&self) -> BrowserConfig {
508
+ self.browser.clone()
509
+ }
510
+
511
+ fn proxy(&self) -> Option<ProxyConfig> {
512
+ self.proxy.clone()
513
+ }
514
+
515
+ fn user_agents(&self) -> Vec<String> {
516
+ self.user_agents.clone()
517
+ }
518
+
519
+ fn capture_screenshot(&self) -> bool {
520
+ self.capture_screenshot
521
+ }
522
+
523
+ fn download_documents(&self) -> bool {
524
+ self.download_documents
525
+ }
526
+
527
+ fn document_max_size(&self) -> Option<usize> {
528
+ self.document_max_size
529
+ }
530
+
531
+ fn document_mime_types(&self) -> Vec<String> {
532
+ self.document_mime_types.clone()
533
+ }
534
+
535
+ fn warc_output(&self) -> Option<String> {
536
+ self.warc_output.clone()
537
+ }
538
+
539
+ fn browser_profile(&self) -> Option<String> {
540
+ self.browser_profile.clone()
541
+ }
542
+
543
+ fn save_browser_profile(&self) -> bool {
544
+ self.save_browser_profile
545
+ }
546
+ }
547
+
548
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
549
+ #[magnus::wrap(class = "Kreuzcrawl::DownloadedDocument")]
550
+ #[serde(default)]
551
+ pub struct DownloadedDocument {
552
+ pub url: String,
553
+ pub mime_type: String,
554
+ pub content: Vec<u8>,
555
+ pub size: usize,
556
+ pub filename: Option<String>,
557
+ pub content_hash: String,
558
+ pub headers: HashMap<String, String>,
559
+ }
560
+
561
+ unsafe impl IntoValueFromNative for DownloadedDocument {}
562
+
563
+ impl magnus::TryConvert for DownloadedDocument {
564
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
565
+ let r: &DownloadedDocument = magnus::TryConvert::try_convert(val)?;
566
+ Ok(r.clone())
567
+ }
568
+ }
569
+ unsafe impl TryConvertOwned for DownloadedDocument {}
570
+
571
+ impl Default for DownloadedDocument {
572
+ fn default() -> Self {
573
+ Self {
574
+ url: Default::default(),
575
+ mime_type: Default::default(),
576
+ content: Default::default(),
577
+ size: Default::default(),
578
+ filename: Default::default(),
579
+ content_hash: Default::default(),
580
+ headers: Default::default(),
581
+ }
582
+ }
583
+ }
584
+
585
+ impl DownloadedDocument {
586
+ fn new(
587
+ url: Option<String>,
588
+ mime_type: Option<String>,
589
+ content: Option<Vec<u8>>,
590
+ size: Option<usize>,
591
+ filename: Option<String>,
592
+ content_hash: Option<String>,
593
+ headers: Option<HashMap<String, String>>,
594
+ ) -> Self {
595
+ Self {
596
+ url: url.unwrap_or_default(),
597
+ mime_type: mime_type.unwrap_or_default(),
598
+ content: content.unwrap_or_default(),
599
+ size: size.unwrap_or_default(),
600
+ filename,
601
+ content_hash: content_hash.unwrap_or_default(),
602
+ headers: headers.unwrap_or_default(),
603
+ }
604
+ }
605
+
606
+ fn url(&self) -> String {
607
+ self.url.clone()
608
+ }
609
+
610
+ fn mime_type(&self) -> String {
611
+ self.mime_type.clone()
612
+ }
613
+
614
+ fn content(&self) -> Vec<u8> {
615
+ self.content.clone()
616
+ }
617
+
618
+ fn size(&self) -> usize {
619
+ self.size
620
+ }
621
+
622
+ fn filename(&self) -> Option<String> {
623
+ self.filename.clone()
624
+ }
625
+
626
+ fn content_hash(&self) -> String {
627
+ self.content_hash.clone()
628
+ }
629
+
630
+ fn headers(&self) -> HashMap<String, String> {
631
+ self.headers.clone()
632
+ }
633
+ }
634
+
635
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
636
+ #[magnus::wrap(class = "Kreuzcrawl::InteractionResult")]
637
+ #[serde(default)]
638
+ pub struct InteractionResult {
639
+ pub action_results: Vec<ActionResult>,
640
+ pub final_html: String,
641
+ pub final_url: String,
642
+ pub screenshot: Option<Vec<u8>>,
643
+ }
644
+
645
+ unsafe impl IntoValueFromNative for InteractionResult {}
646
+
647
+ impl magnus::TryConvert for InteractionResult {
648
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
649
+ let r: &InteractionResult = magnus::TryConvert::try_convert(val)?;
650
+ Ok(r.clone())
651
+ }
652
+ }
653
+ unsafe impl TryConvertOwned for InteractionResult {}
654
+
655
+ impl Default for InteractionResult {
656
+ fn default() -> Self {
657
+ Self {
658
+ action_results: Default::default(),
659
+ final_html: Default::default(),
660
+ final_url: Default::default(),
661
+ screenshot: Default::default(),
662
+ }
663
+ }
664
+ }
665
+
666
+ impl InteractionResult {
667
+ fn new(
668
+ action_results: Option<Vec<ActionResult>>,
669
+ final_html: Option<String>,
670
+ final_url: Option<String>,
671
+ screenshot: Option<Vec<u8>>,
672
+ ) -> Self {
673
+ Self {
674
+ action_results: action_results.unwrap_or_default(),
675
+ final_html: final_html.unwrap_or_default(),
676
+ final_url: final_url.unwrap_or_default(),
677
+ screenshot,
678
+ }
679
+ }
680
+
681
+ fn action_results(&self) -> Vec<ActionResult> {
682
+ self.action_results.clone()
683
+ }
684
+
685
+ fn final_html(&self) -> String {
686
+ self.final_html.clone()
687
+ }
688
+
689
+ fn final_url(&self) -> String {
690
+ self.final_url.clone()
691
+ }
692
+
693
+ fn screenshot(&self) -> Option<Vec<u8>> {
694
+ self.screenshot.clone()
695
+ }
696
+ }
697
+
698
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
699
+ #[magnus::wrap(class = "Kreuzcrawl::ActionResult")]
700
+ #[serde(default)]
701
+ pub struct ActionResult {
702
+ pub action_index: usize,
703
+ pub action_type: String,
704
+ pub success: bool,
705
+ pub data: Option<String>,
706
+ pub error: Option<String>,
707
+ }
708
+
709
+ unsafe impl IntoValueFromNative for ActionResult {}
710
+
711
+ impl magnus::TryConvert for ActionResult {
712
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
713
+ let r: &ActionResult = magnus::TryConvert::try_convert(val)?;
714
+ Ok(r.clone())
715
+ }
716
+ }
717
+ unsafe impl TryConvertOwned for ActionResult {}
718
+
719
+ impl Default for ActionResult {
720
+ fn default() -> Self {
721
+ Self {
722
+ action_index: Default::default(),
723
+ action_type: Default::default(),
724
+ success: Default::default(),
725
+ data: Default::default(),
726
+ error: Default::default(),
727
+ }
728
+ }
729
+ }
730
+
731
+ impl ActionResult {
732
+ fn new(
733
+ action_index: Option<usize>,
734
+ action_type: Option<String>,
735
+ success: Option<bool>,
736
+ data: Option<String>,
737
+ error: Option<String>,
738
+ ) -> Self {
739
+ Self {
740
+ action_index: action_index.unwrap_or_default(),
741
+ action_type: action_type.unwrap_or_default(),
742
+ success: success.unwrap_or_default(),
743
+ data,
744
+ error,
745
+ }
746
+ }
747
+
748
+ fn action_index(&self) -> usize {
749
+ self.action_index
750
+ }
751
+
752
+ fn action_type(&self) -> String {
753
+ self.action_type.clone()
754
+ }
755
+
756
+ fn success(&self) -> bool {
757
+ self.success
758
+ }
759
+
760
+ fn data(&self) -> Option<String> {
761
+ self.data.clone()
762
+ }
763
+
764
+ fn error(&self) -> Option<String> {
765
+ self.error.clone()
766
+ }
767
+ }
768
+
769
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
770
+ #[magnus::wrap(class = "Kreuzcrawl::ScrapeResult")]
771
+ #[serde(default)]
772
+ pub struct ScrapeResult {
773
+ pub status_code: u16,
774
+ pub content_type: String,
775
+ pub html: String,
776
+ pub body_size: usize,
777
+ pub metadata: PageMetadata,
778
+ pub links: Vec<LinkInfo>,
779
+ pub images: Vec<ImageInfo>,
780
+ pub feeds: Vec<FeedInfo>,
781
+ pub json_ld: Vec<JsonLdEntry>,
782
+ pub is_allowed: bool,
783
+ pub crawl_delay: Option<u64>,
784
+ pub noindex_detected: bool,
785
+ pub nofollow_detected: bool,
786
+ pub x_robots_tag: Option<String>,
787
+ pub is_pdf: bool,
788
+ pub was_skipped: bool,
789
+ pub detected_charset: Option<String>,
790
+ pub main_content_only: bool,
791
+ pub auth_header_sent: bool,
792
+ pub response_meta: Option<ResponseMeta>,
793
+ pub assets: Vec<DownloadedAsset>,
794
+ pub js_render_hint: bool,
795
+ pub browser_used: bool,
796
+ pub markdown: Option<MarkdownResult>,
797
+ pub extracted_data: Option<String>,
798
+ pub extraction_meta: Option<ExtractionMeta>,
799
+ pub screenshot: Option<Vec<u8>>,
800
+ pub downloaded_document: Option<DownloadedDocument>,
801
+ }
802
+
803
+ unsafe impl IntoValueFromNative for ScrapeResult {}
804
+
805
+ impl magnus::TryConvert for ScrapeResult {
806
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
807
+ let r: &ScrapeResult = magnus::TryConvert::try_convert(val)?;
808
+ Ok(r.clone())
809
+ }
810
+ }
811
+ unsafe impl TryConvertOwned for ScrapeResult {}
812
+
813
+ impl Default for ScrapeResult {
814
+ fn default() -> Self {
815
+ Self {
816
+ status_code: Default::default(),
817
+ content_type: Default::default(),
818
+ html: Default::default(),
819
+ body_size: Default::default(),
820
+ metadata: Default::default(),
821
+ links: Default::default(),
822
+ images: Default::default(),
823
+ feeds: Default::default(),
824
+ json_ld: Default::default(),
825
+ is_allowed: Default::default(),
826
+ crawl_delay: Default::default(),
827
+ noindex_detected: Default::default(),
828
+ nofollow_detected: Default::default(),
829
+ x_robots_tag: Default::default(),
830
+ is_pdf: Default::default(),
831
+ was_skipped: Default::default(),
832
+ detected_charset: Default::default(),
833
+ main_content_only: Default::default(),
834
+ auth_header_sent: Default::default(),
835
+ response_meta: Default::default(),
836
+ assets: Default::default(),
837
+ js_render_hint: Default::default(),
838
+ browser_used: Default::default(),
839
+ markdown: Default::default(),
840
+ extracted_data: Default::default(),
841
+ extraction_meta: Default::default(),
842
+ screenshot: Default::default(),
843
+ downloaded_document: Default::default(),
844
+ }
845
+ }
846
+ }
847
+
848
+ impl ScrapeResult {
849
+ fn new(kwargs: magnus::RHash) -> Result<Self, magnus::Error> {
850
+ let ruby = unsafe { magnus::Ruby::get_unchecked() };
851
+ Ok(Self {
852
+ status_code: kwargs
853
+ .get(ruby.to_symbol("status_code"))
854
+ .and_then(|v| u16::try_convert(v).ok())
855
+ .unwrap_or_default(),
856
+ content_type: kwargs
857
+ .get(ruby.to_symbol("content_type"))
858
+ .and_then(|v| String::try_convert(v).ok())
859
+ .unwrap_or_default(),
860
+ html: kwargs
861
+ .get(ruby.to_symbol("html"))
862
+ .and_then(|v| String::try_convert(v).ok())
863
+ .unwrap_or_default(),
864
+ body_size: kwargs
865
+ .get(ruby.to_symbol("body_size"))
866
+ .and_then(|v| usize::try_convert(v).ok())
867
+ .unwrap_or_default(),
868
+ metadata: kwargs
869
+ .get(ruby.to_symbol("metadata"))
870
+ .and_then(|v| PageMetadata::try_convert(v).ok())
871
+ .unwrap_or_default(),
872
+ links: kwargs
873
+ .get(ruby.to_symbol("links"))
874
+ .and_then(|v| <Vec<LinkInfo>>::try_convert(v).ok())
875
+ .unwrap_or_default(),
876
+ images: kwargs
877
+ .get(ruby.to_symbol("images"))
878
+ .and_then(|v| <Vec<ImageInfo>>::try_convert(v).ok())
879
+ .unwrap_or_default(),
880
+ feeds: kwargs
881
+ .get(ruby.to_symbol("feeds"))
882
+ .and_then(|v| <Vec<FeedInfo>>::try_convert(v).ok())
883
+ .unwrap_or_default(),
884
+ json_ld: kwargs
885
+ .get(ruby.to_symbol("json_ld"))
886
+ .and_then(|v| <Vec<JsonLdEntry>>::try_convert(v).ok())
887
+ .unwrap_or_default(),
888
+ is_allowed: kwargs
889
+ .get(ruby.to_symbol("is_allowed"))
890
+ .and_then(|v| bool::try_convert(v).ok())
891
+ .unwrap_or_default(),
892
+ crawl_delay: kwargs
893
+ .get(ruby.to_symbol("crawl_delay"))
894
+ .and_then(|v| u64::try_convert(v).ok()),
895
+ noindex_detected: kwargs
896
+ .get(ruby.to_symbol("noindex_detected"))
897
+ .and_then(|v| bool::try_convert(v).ok())
898
+ .unwrap_or_default(),
899
+ nofollow_detected: kwargs
900
+ .get(ruby.to_symbol("nofollow_detected"))
901
+ .and_then(|v| bool::try_convert(v).ok())
902
+ .unwrap_or_default(),
903
+ x_robots_tag: kwargs
904
+ .get(ruby.to_symbol("x_robots_tag"))
905
+ .and_then(|v| String::try_convert(v).ok()),
906
+ is_pdf: kwargs
907
+ .get(ruby.to_symbol("is_pdf"))
908
+ .and_then(|v| bool::try_convert(v).ok())
909
+ .unwrap_or_default(),
910
+ was_skipped: kwargs
911
+ .get(ruby.to_symbol("was_skipped"))
912
+ .and_then(|v| bool::try_convert(v).ok())
913
+ .unwrap_or_default(),
914
+ detected_charset: kwargs
915
+ .get(ruby.to_symbol("detected_charset"))
916
+ .and_then(|v| String::try_convert(v).ok()),
917
+ main_content_only: kwargs
918
+ .get(ruby.to_symbol("main_content_only"))
919
+ .and_then(|v| bool::try_convert(v).ok())
920
+ .unwrap_or_default(),
921
+ auth_header_sent: kwargs
922
+ .get(ruby.to_symbol("auth_header_sent"))
923
+ .and_then(|v| bool::try_convert(v).ok())
924
+ .unwrap_or_default(),
925
+ response_meta: kwargs
926
+ .get(ruby.to_symbol("response_meta"))
927
+ .and_then(|v| ResponseMeta::try_convert(v).ok()),
928
+ assets: kwargs
929
+ .get(ruby.to_symbol("assets"))
930
+ .and_then(|v| <Vec<DownloadedAsset>>::try_convert(v).ok())
931
+ .unwrap_or_default(),
932
+ js_render_hint: kwargs
933
+ .get(ruby.to_symbol("js_render_hint"))
934
+ .and_then(|v| bool::try_convert(v).ok())
935
+ .unwrap_or_default(),
936
+ browser_used: kwargs
937
+ .get(ruby.to_symbol("browser_used"))
938
+ .and_then(|v| bool::try_convert(v).ok())
939
+ .unwrap_or_default(),
940
+ markdown: kwargs
941
+ .get(ruby.to_symbol("markdown"))
942
+ .and_then(|v| MarkdownResult::try_convert(v).ok()),
943
+ extracted_data: kwargs
944
+ .get(ruby.to_symbol("extracted_data"))
945
+ .and_then(|v| String::try_convert(v).ok()),
946
+ extraction_meta: kwargs
947
+ .get(ruby.to_symbol("extraction_meta"))
948
+ .and_then(|v| ExtractionMeta::try_convert(v).ok()),
949
+ screenshot: kwargs
950
+ .get(ruby.to_symbol("screenshot"))
951
+ .and_then(|v| <Vec<u8>>::try_convert(v).ok()),
952
+ downloaded_document: kwargs
953
+ .get(ruby.to_symbol("downloaded_document"))
954
+ .and_then(|v| DownloadedDocument::try_convert(v).ok()),
955
+ })
956
+ }
957
+
958
+ fn status_code(&self) -> u16 {
959
+ self.status_code
960
+ }
961
+
962
+ fn content_type(&self) -> String {
963
+ self.content_type.clone()
964
+ }
965
+
966
+ fn html(&self) -> String {
967
+ self.html.clone()
968
+ }
969
+
970
+ fn body_size(&self) -> usize {
971
+ self.body_size
972
+ }
973
+
974
+ fn metadata(&self) -> PageMetadata {
975
+ self.metadata.clone()
976
+ }
977
+
978
+ fn links(&self) -> Vec<LinkInfo> {
979
+ self.links.clone()
980
+ }
981
+
982
+ fn images(&self) -> Vec<ImageInfo> {
983
+ self.images.clone()
984
+ }
985
+
986
+ fn feeds(&self) -> Vec<FeedInfo> {
987
+ self.feeds.clone()
988
+ }
989
+
990
+ fn json_ld(&self) -> Vec<JsonLdEntry> {
991
+ self.json_ld.clone()
992
+ }
993
+
994
+ fn is_allowed(&self) -> bool {
995
+ self.is_allowed
996
+ }
997
+
998
+ fn crawl_delay(&self) -> Option<u64> {
999
+ self.crawl_delay
1000
+ }
1001
+
1002
+ fn noindex_detected(&self) -> bool {
1003
+ self.noindex_detected
1004
+ }
1005
+
1006
+ fn nofollow_detected(&self) -> bool {
1007
+ self.nofollow_detected
1008
+ }
1009
+
1010
+ fn x_robots_tag(&self) -> Option<String> {
1011
+ self.x_robots_tag.clone()
1012
+ }
1013
+
1014
+ fn is_pdf(&self) -> bool {
1015
+ self.is_pdf
1016
+ }
1017
+
1018
+ fn was_skipped(&self) -> bool {
1019
+ self.was_skipped
1020
+ }
1021
+
1022
+ fn detected_charset(&self) -> Option<String> {
1023
+ self.detected_charset.clone()
1024
+ }
1025
+
1026
+ fn main_content_only(&self) -> bool {
1027
+ self.main_content_only
1028
+ }
1029
+
1030
+ fn auth_header_sent(&self) -> bool {
1031
+ self.auth_header_sent
1032
+ }
1033
+
1034
+ fn response_meta(&self) -> Option<ResponseMeta> {
1035
+ self.response_meta.clone()
1036
+ }
1037
+
1038
+ fn assets(&self) -> Vec<DownloadedAsset> {
1039
+ self.assets.clone()
1040
+ }
1041
+
1042
+ fn js_render_hint(&self) -> bool {
1043
+ self.js_render_hint
1044
+ }
1045
+
1046
+ fn browser_used(&self) -> bool {
1047
+ self.browser_used
1048
+ }
1049
+
1050
+ fn markdown(&self) -> Option<MarkdownResult> {
1051
+ self.markdown.clone()
1052
+ }
1053
+
1054
+ fn extracted_data(&self) -> Option<String> {
1055
+ self.extracted_data.clone()
1056
+ }
1057
+
1058
+ fn extraction_meta(&self) -> Option<ExtractionMeta> {
1059
+ self.extraction_meta.clone()
1060
+ }
1061
+
1062
+ fn screenshot(&self) -> Option<Vec<u8>> {
1063
+ self.screenshot.clone()
1064
+ }
1065
+
1066
+ fn downloaded_document(&self) -> Option<DownloadedDocument> {
1067
+ self.downloaded_document.clone()
1068
+ }
1069
+ }
1070
+
1071
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1072
+ #[magnus::wrap(class = "Kreuzcrawl::CrawlPageResult")]
1073
+ #[serde(default)]
1074
+ pub struct CrawlPageResult {
1075
+ pub url: String,
1076
+ pub normalized_url: String,
1077
+ pub status_code: u16,
1078
+ pub content_type: String,
1079
+ pub html: String,
1080
+ pub body_size: usize,
1081
+ pub metadata: PageMetadata,
1082
+ pub links: Vec<LinkInfo>,
1083
+ pub images: Vec<ImageInfo>,
1084
+ pub feeds: Vec<FeedInfo>,
1085
+ pub json_ld: Vec<JsonLdEntry>,
1086
+ pub depth: usize,
1087
+ pub stayed_on_domain: bool,
1088
+ pub was_skipped: bool,
1089
+ pub is_pdf: bool,
1090
+ pub detected_charset: Option<String>,
1091
+ pub markdown: Option<MarkdownResult>,
1092
+ pub extracted_data: Option<String>,
1093
+ pub extraction_meta: Option<ExtractionMeta>,
1094
+ pub downloaded_document: Option<DownloadedDocument>,
1095
+ }
1096
+
1097
+ unsafe impl IntoValueFromNative for CrawlPageResult {}
1098
+
1099
+ impl magnus::TryConvert for CrawlPageResult {
1100
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1101
+ let r: &CrawlPageResult = magnus::TryConvert::try_convert(val)?;
1102
+ Ok(r.clone())
1103
+ }
1104
+ }
1105
+ unsafe impl TryConvertOwned for CrawlPageResult {}
1106
+
1107
+ impl Default for CrawlPageResult {
1108
+ fn default() -> Self {
1109
+ Self {
1110
+ url: Default::default(),
1111
+ normalized_url: Default::default(),
1112
+ status_code: Default::default(),
1113
+ content_type: Default::default(),
1114
+ html: Default::default(),
1115
+ body_size: Default::default(),
1116
+ metadata: Default::default(),
1117
+ links: Default::default(),
1118
+ images: Default::default(),
1119
+ feeds: Default::default(),
1120
+ json_ld: Default::default(),
1121
+ depth: Default::default(),
1122
+ stayed_on_domain: Default::default(),
1123
+ was_skipped: Default::default(),
1124
+ is_pdf: Default::default(),
1125
+ detected_charset: Default::default(),
1126
+ markdown: Default::default(),
1127
+ extracted_data: Default::default(),
1128
+ extraction_meta: Default::default(),
1129
+ downloaded_document: Default::default(),
1130
+ }
1131
+ }
1132
+ }
1133
+
1134
+ impl CrawlPageResult {
1135
+ fn new(kwargs: magnus::RHash) -> Result<Self, magnus::Error> {
1136
+ let ruby = unsafe { magnus::Ruby::get_unchecked() };
1137
+ Ok(Self {
1138
+ url: kwargs
1139
+ .get(ruby.to_symbol("url"))
1140
+ .and_then(|v| String::try_convert(v).ok())
1141
+ .unwrap_or_default(),
1142
+ normalized_url: kwargs
1143
+ .get(ruby.to_symbol("normalized_url"))
1144
+ .and_then(|v| String::try_convert(v).ok())
1145
+ .unwrap_or_default(),
1146
+ status_code: kwargs
1147
+ .get(ruby.to_symbol("status_code"))
1148
+ .and_then(|v| u16::try_convert(v).ok())
1149
+ .unwrap_or_default(),
1150
+ content_type: kwargs
1151
+ .get(ruby.to_symbol("content_type"))
1152
+ .and_then(|v| String::try_convert(v).ok())
1153
+ .unwrap_or_default(),
1154
+ html: kwargs
1155
+ .get(ruby.to_symbol("html"))
1156
+ .and_then(|v| String::try_convert(v).ok())
1157
+ .unwrap_or_default(),
1158
+ body_size: kwargs
1159
+ .get(ruby.to_symbol("body_size"))
1160
+ .and_then(|v| usize::try_convert(v).ok())
1161
+ .unwrap_or_default(),
1162
+ metadata: kwargs
1163
+ .get(ruby.to_symbol("metadata"))
1164
+ .and_then(|v| PageMetadata::try_convert(v).ok())
1165
+ .unwrap_or_default(),
1166
+ links: kwargs
1167
+ .get(ruby.to_symbol("links"))
1168
+ .and_then(|v| <Vec<LinkInfo>>::try_convert(v).ok())
1169
+ .unwrap_or_default(),
1170
+ images: kwargs
1171
+ .get(ruby.to_symbol("images"))
1172
+ .and_then(|v| <Vec<ImageInfo>>::try_convert(v).ok())
1173
+ .unwrap_or_default(),
1174
+ feeds: kwargs
1175
+ .get(ruby.to_symbol("feeds"))
1176
+ .and_then(|v| <Vec<FeedInfo>>::try_convert(v).ok())
1177
+ .unwrap_or_default(),
1178
+ json_ld: kwargs
1179
+ .get(ruby.to_symbol("json_ld"))
1180
+ .and_then(|v| <Vec<JsonLdEntry>>::try_convert(v).ok())
1181
+ .unwrap_or_default(),
1182
+ depth: kwargs
1183
+ .get(ruby.to_symbol("depth"))
1184
+ .and_then(|v| usize::try_convert(v).ok())
1185
+ .unwrap_or_default(),
1186
+ stayed_on_domain: kwargs
1187
+ .get(ruby.to_symbol("stayed_on_domain"))
1188
+ .and_then(|v| bool::try_convert(v).ok())
1189
+ .unwrap_or_default(),
1190
+ was_skipped: kwargs
1191
+ .get(ruby.to_symbol("was_skipped"))
1192
+ .and_then(|v| bool::try_convert(v).ok())
1193
+ .unwrap_or_default(),
1194
+ is_pdf: kwargs
1195
+ .get(ruby.to_symbol("is_pdf"))
1196
+ .and_then(|v| bool::try_convert(v).ok())
1197
+ .unwrap_or_default(),
1198
+ detected_charset: kwargs
1199
+ .get(ruby.to_symbol("detected_charset"))
1200
+ .and_then(|v| String::try_convert(v).ok()),
1201
+ markdown: kwargs
1202
+ .get(ruby.to_symbol("markdown"))
1203
+ .and_then(|v| MarkdownResult::try_convert(v).ok()),
1204
+ extracted_data: kwargs
1205
+ .get(ruby.to_symbol("extracted_data"))
1206
+ .and_then(|v| String::try_convert(v).ok()),
1207
+ extraction_meta: kwargs
1208
+ .get(ruby.to_symbol("extraction_meta"))
1209
+ .and_then(|v| ExtractionMeta::try_convert(v).ok()),
1210
+ downloaded_document: kwargs
1211
+ .get(ruby.to_symbol("downloaded_document"))
1212
+ .and_then(|v| DownloadedDocument::try_convert(v).ok()),
1213
+ })
1214
+ }
1215
+
1216
+ fn url(&self) -> String {
1217
+ self.url.clone()
1218
+ }
1219
+
1220
+ fn normalized_url(&self) -> String {
1221
+ self.normalized_url.clone()
1222
+ }
1223
+
1224
+ fn status_code(&self) -> u16 {
1225
+ self.status_code
1226
+ }
1227
+
1228
+ fn content_type(&self) -> String {
1229
+ self.content_type.clone()
1230
+ }
1231
+
1232
+ fn html(&self) -> String {
1233
+ self.html.clone()
1234
+ }
1235
+
1236
+ fn body_size(&self) -> usize {
1237
+ self.body_size
1238
+ }
1239
+
1240
+ fn metadata(&self) -> PageMetadata {
1241
+ self.metadata.clone()
1242
+ }
1243
+
1244
+ fn links(&self) -> Vec<LinkInfo> {
1245
+ self.links.clone()
1246
+ }
1247
+
1248
+ fn images(&self) -> Vec<ImageInfo> {
1249
+ self.images.clone()
1250
+ }
1251
+
1252
+ fn feeds(&self) -> Vec<FeedInfo> {
1253
+ self.feeds.clone()
1254
+ }
1255
+
1256
+ fn json_ld(&self) -> Vec<JsonLdEntry> {
1257
+ self.json_ld.clone()
1258
+ }
1259
+
1260
+ fn depth(&self) -> usize {
1261
+ self.depth
1262
+ }
1263
+
1264
+ fn stayed_on_domain(&self) -> bool {
1265
+ self.stayed_on_domain
1266
+ }
1267
+
1268
+ fn was_skipped(&self) -> bool {
1269
+ self.was_skipped
1270
+ }
1271
+
1272
+ fn is_pdf(&self) -> bool {
1273
+ self.is_pdf
1274
+ }
1275
+
1276
+ fn detected_charset(&self) -> Option<String> {
1277
+ self.detected_charset.clone()
1278
+ }
1279
+
1280
+ fn markdown(&self) -> Option<MarkdownResult> {
1281
+ self.markdown.clone()
1282
+ }
1283
+
1284
+ fn extracted_data(&self) -> Option<String> {
1285
+ self.extracted_data.clone()
1286
+ }
1287
+
1288
+ fn extraction_meta(&self) -> Option<ExtractionMeta> {
1289
+ self.extraction_meta.clone()
1290
+ }
1291
+
1292
+ fn downloaded_document(&self) -> Option<DownloadedDocument> {
1293
+ self.downloaded_document.clone()
1294
+ }
1295
+ }
1296
+
1297
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1298
+ #[magnus::wrap(class = "Kreuzcrawl::CrawlResult")]
1299
+ #[serde(default)]
1300
+ pub struct CrawlResult {
1301
+ pub pages: Vec<CrawlPageResult>,
1302
+ pub final_url: String,
1303
+ pub redirect_count: usize,
1304
+ pub was_skipped: bool,
1305
+ pub error: Option<String>,
1306
+ pub cookies: Vec<CookieInfo>,
1307
+ pub normalized_urls: Vec<String>,
1308
+ }
1309
+
1310
+ unsafe impl IntoValueFromNative for CrawlResult {}
1311
+
1312
+ impl magnus::TryConvert for CrawlResult {
1313
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1314
+ let r: &CrawlResult = magnus::TryConvert::try_convert(val)?;
1315
+ Ok(r.clone())
1316
+ }
1317
+ }
1318
+ unsafe impl TryConvertOwned for CrawlResult {}
1319
+
1320
+ impl Default for CrawlResult {
1321
+ fn default() -> Self {
1322
+ Self {
1323
+ pages: Default::default(),
1324
+ final_url: Default::default(),
1325
+ redirect_count: Default::default(),
1326
+ was_skipped: Default::default(),
1327
+ error: Default::default(),
1328
+ cookies: Default::default(),
1329
+ normalized_urls: Default::default(),
1330
+ }
1331
+ }
1332
+ }
1333
+
1334
+ impl CrawlResult {
1335
+ fn new(
1336
+ pages: Option<Vec<CrawlPageResult>>,
1337
+ final_url: Option<String>,
1338
+ redirect_count: Option<usize>,
1339
+ was_skipped: Option<bool>,
1340
+ error: Option<String>,
1341
+ cookies: Option<Vec<CookieInfo>>,
1342
+ normalized_urls: Option<Vec<String>>,
1343
+ ) -> Self {
1344
+ Self {
1345
+ pages: pages.unwrap_or_default(),
1346
+ final_url: final_url.unwrap_or_default(),
1347
+ redirect_count: redirect_count.unwrap_or_default(),
1348
+ was_skipped: was_skipped.unwrap_or_default(),
1349
+ error,
1350
+ cookies: cookies.unwrap_or_default(),
1351
+ normalized_urls: normalized_urls.unwrap_or_default(),
1352
+ }
1353
+ }
1354
+
1355
+ fn pages(&self) -> Vec<CrawlPageResult> {
1356
+ self.pages.clone()
1357
+ }
1358
+
1359
+ fn final_url(&self) -> String {
1360
+ self.final_url.clone()
1361
+ }
1362
+
1363
+ fn redirect_count(&self) -> usize {
1364
+ self.redirect_count
1365
+ }
1366
+
1367
+ fn was_skipped(&self) -> bool {
1368
+ self.was_skipped
1369
+ }
1370
+
1371
+ fn error(&self) -> Option<String> {
1372
+ self.error.clone()
1373
+ }
1374
+
1375
+ fn cookies(&self) -> Vec<CookieInfo> {
1376
+ self.cookies.clone()
1377
+ }
1378
+
1379
+ fn normalized_urls(&self) -> Vec<String> {
1380
+ self.normalized_urls.clone()
1381
+ }
1382
+ }
1383
+
1384
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1385
+ #[magnus::wrap(class = "Kreuzcrawl::SitemapUrl")]
1386
+ #[serde(default)]
1387
+ pub struct SitemapUrl {
1388
+ pub url: String,
1389
+ pub lastmod: Option<String>,
1390
+ pub changefreq: Option<String>,
1391
+ pub priority: Option<String>,
1392
+ }
1393
+
1394
+ unsafe impl IntoValueFromNative for SitemapUrl {}
1395
+
1396
+ impl magnus::TryConvert for SitemapUrl {
1397
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1398
+ let r: &SitemapUrl = magnus::TryConvert::try_convert(val)?;
1399
+ Ok(r.clone())
1400
+ }
1401
+ }
1402
+ unsafe impl TryConvertOwned for SitemapUrl {}
1403
+
1404
+ impl Default for SitemapUrl {
1405
+ fn default() -> Self {
1406
+ Self {
1407
+ url: Default::default(),
1408
+ lastmod: Default::default(),
1409
+ changefreq: Default::default(),
1410
+ priority: Default::default(),
1411
+ }
1412
+ }
1413
+ }
1414
+
1415
+ impl SitemapUrl {
1416
+ fn new(url: Option<String>, lastmod: Option<String>, changefreq: Option<String>, priority: Option<String>) -> Self {
1417
+ Self {
1418
+ url: url.unwrap_or_default(),
1419
+ lastmod,
1420
+ changefreq,
1421
+ priority,
1422
+ }
1423
+ }
1424
+
1425
+ fn url(&self) -> String {
1426
+ self.url.clone()
1427
+ }
1428
+
1429
+ fn lastmod(&self) -> Option<String> {
1430
+ self.lastmod.clone()
1431
+ }
1432
+
1433
+ fn changefreq(&self) -> Option<String> {
1434
+ self.changefreq.clone()
1435
+ }
1436
+
1437
+ fn priority(&self) -> Option<String> {
1438
+ self.priority.clone()
1439
+ }
1440
+ }
1441
+
1442
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1443
+ #[magnus::wrap(class = "Kreuzcrawl::MapResult")]
1444
+ #[serde(default)]
1445
+ pub struct MapResult {
1446
+ pub urls: Vec<SitemapUrl>,
1447
+ }
1448
+
1449
+ unsafe impl IntoValueFromNative for MapResult {}
1450
+
1451
+ impl magnus::TryConvert for MapResult {
1452
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1453
+ let r: &MapResult = magnus::TryConvert::try_convert(val)?;
1454
+ Ok(r.clone())
1455
+ }
1456
+ }
1457
+ unsafe impl TryConvertOwned for MapResult {}
1458
+
1459
+ impl Default for MapResult {
1460
+ fn default() -> Self {
1461
+ Self {
1462
+ urls: Default::default(),
1463
+ }
1464
+ }
1465
+ }
1466
+
1467
+ impl MapResult {
1468
+ fn new(urls: Option<Vec<SitemapUrl>>) -> Self {
1469
+ Self {
1470
+ urls: urls.unwrap_or_default(),
1471
+ }
1472
+ }
1473
+
1474
+ fn urls(&self) -> Vec<SitemapUrl> {
1475
+ self.urls.clone()
1476
+ }
1477
+ }
1478
+
1479
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1480
+ #[magnus::wrap(class = "Kreuzcrawl::MarkdownResult")]
1481
+ #[serde(default)]
1482
+ pub struct MarkdownResult {
1483
+ pub content: String,
1484
+ pub document_structure: Option<String>,
1485
+ pub tables: Vec<String>,
1486
+ pub warnings: Vec<String>,
1487
+ pub citations: Option<CitationResult>,
1488
+ pub fit_content: Option<String>,
1489
+ }
1490
+
1491
+ unsafe impl IntoValueFromNative for MarkdownResult {}
1492
+
1493
+ impl magnus::TryConvert for MarkdownResult {
1494
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1495
+ let r: &MarkdownResult = magnus::TryConvert::try_convert(val)?;
1496
+ Ok(r.clone())
1497
+ }
1498
+ }
1499
+ unsafe impl TryConvertOwned for MarkdownResult {}
1500
+
1501
+ impl Default for MarkdownResult {
1502
+ fn default() -> Self {
1503
+ Self {
1504
+ content: Default::default(),
1505
+ document_structure: Default::default(),
1506
+ tables: Default::default(),
1507
+ warnings: Default::default(),
1508
+ citations: Default::default(),
1509
+ fit_content: Default::default(),
1510
+ }
1511
+ }
1512
+ }
1513
+
1514
+ impl MarkdownResult {
1515
+ fn new(
1516
+ content: Option<String>,
1517
+ document_structure: Option<String>,
1518
+ tables: Option<Vec<String>>,
1519
+ warnings: Option<Vec<String>>,
1520
+ citations: Option<CitationResult>,
1521
+ fit_content: Option<String>,
1522
+ ) -> Self {
1523
+ Self {
1524
+ content: content.unwrap_or_default(),
1525
+ document_structure,
1526
+ tables: tables.unwrap_or_default(),
1527
+ warnings: warnings.unwrap_or_default(),
1528
+ citations,
1529
+ fit_content,
1530
+ }
1531
+ }
1532
+
1533
+ fn content(&self) -> String {
1534
+ self.content.clone()
1535
+ }
1536
+
1537
+ fn document_structure(&self) -> Option<String> {
1538
+ self.document_structure.clone()
1539
+ }
1540
+
1541
+ fn tables(&self) -> Vec<String> {
1542
+ self.tables.clone()
1543
+ }
1544
+
1545
+ fn warnings(&self) -> Vec<String> {
1546
+ self.warnings.clone()
1547
+ }
1548
+
1549
+ fn citations(&self) -> Option<CitationResult> {
1550
+ self.citations.clone()
1551
+ }
1552
+
1553
+ fn fit_content(&self) -> Option<String> {
1554
+ self.fit_content.clone()
1555
+ }
1556
+ }
1557
+
1558
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1559
+ #[magnus::wrap(class = "Kreuzcrawl::CachedPage")]
1560
+ #[serde(default)]
1561
+ pub struct CachedPage {
1562
+ pub url: String,
1563
+ pub status_code: u16,
1564
+ pub content_type: String,
1565
+ pub body: String,
1566
+ pub etag: Option<String>,
1567
+ pub last_modified: Option<String>,
1568
+ pub cached_at: u64,
1569
+ }
1570
+
1571
+ unsafe impl IntoValueFromNative for CachedPage {}
1572
+
1573
+ impl magnus::TryConvert for CachedPage {
1574
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1575
+ let r: &CachedPage = magnus::TryConvert::try_convert(val)?;
1576
+ Ok(r.clone())
1577
+ }
1578
+ }
1579
+ unsafe impl TryConvertOwned for CachedPage {}
1580
+
1581
+ impl Default for CachedPage {
1582
+ fn default() -> Self {
1583
+ Self {
1584
+ url: Default::default(),
1585
+ status_code: Default::default(),
1586
+ content_type: Default::default(),
1587
+ body: Default::default(),
1588
+ etag: Default::default(),
1589
+ last_modified: Default::default(),
1590
+ cached_at: Default::default(),
1591
+ }
1592
+ }
1593
+ }
1594
+
1595
+ impl CachedPage {
1596
+ fn new(
1597
+ url: Option<String>,
1598
+ status_code: Option<u16>,
1599
+ content_type: Option<String>,
1600
+ body: Option<String>,
1601
+ etag: Option<String>,
1602
+ last_modified: Option<String>,
1603
+ cached_at: Option<u64>,
1604
+ ) -> Self {
1605
+ Self {
1606
+ url: url.unwrap_or_default(),
1607
+ status_code: status_code.unwrap_or_default(),
1608
+ content_type: content_type.unwrap_or_default(),
1609
+ body: body.unwrap_or_default(),
1610
+ etag,
1611
+ last_modified,
1612
+ cached_at: cached_at.unwrap_or_default(),
1613
+ }
1614
+ }
1615
+
1616
+ fn url(&self) -> String {
1617
+ self.url.clone()
1618
+ }
1619
+
1620
+ fn status_code(&self) -> u16 {
1621
+ self.status_code
1622
+ }
1623
+
1624
+ fn content_type(&self) -> String {
1625
+ self.content_type.clone()
1626
+ }
1627
+
1628
+ fn body(&self) -> String {
1629
+ self.body.clone()
1630
+ }
1631
+
1632
+ fn etag(&self) -> Option<String> {
1633
+ self.etag.clone()
1634
+ }
1635
+
1636
+ fn last_modified(&self) -> Option<String> {
1637
+ self.last_modified.clone()
1638
+ }
1639
+
1640
+ fn cached_at(&self) -> u64 {
1641
+ self.cached_at
1642
+ }
1643
+ }
1644
+
1645
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
1646
+ #[magnus::wrap(class = "Kreuzcrawl::LinkInfo")]
1647
+ #[serde(default)]
1648
+ pub struct LinkInfo {
1649
+ pub url: String,
1650
+ pub text: String,
1651
+ pub link_type: LinkType,
1652
+ pub rel: Option<String>,
1653
+ pub nofollow: bool,
1654
+ }
1655
+
1656
+ unsafe impl IntoValueFromNative for LinkInfo {}
1657
+
1658
+ impl magnus::TryConvert for LinkInfo {
1659
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1660
+ let r: &LinkInfo = magnus::TryConvert::try_convert(val)?;
1661
+ Ok(r.clone())
1662
+ }
1663
+ }
1664
+ unsafe impl TryConvertOwned for LinkInfo {}
1665
+
1666
+ impl LinkInfo {
1667
+ fn new(
1668
+ url: Option<String>,
1669
+ text: Option<String>,
1670
+ link_type: Option<LinkType>,
1671
+ rel: Option<String>,
1672
+ nofollow: Option<bool>,
1673
+ ) -> Self {
1674
+ Self {
1675
+ url: url.unwrap_or_default(),
1676
+ text: text.unwrap_or_default(),
1677
+ link_type: link_type.unwrap_or_default(),
1678
+ rel,
1679
+ nofollow: nofollow.unwrap_or_default(),
1680
+ }
1681
+ }
1682
+
1683
+ fn url(&self) -> String {
1684
+ self.url.clone()
1685
+ }
1686
+
1687
+ fn text(&self) -> String {
1688
+ self.text.clone()
1689
+ }
1690
+
1691
+ fn link_type(&self) -> LinkType {
1692
+ self.link_type.clone()
1693
+ }
1694
+
1695
+ fn rel(&self) -> Option<String> {
1696
+ self.rel.clone()
1697
+ }
1698
+
1699
+ fn nofollow(&self) -> bool {
1700
+ self.nofollow
1701
+ }
1702
+ }
1703
+
1704
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
1705
+ #[magnus::wrap(class = "Kreuzcrawl::ImageInfo")]
1706
+ #[serde(default)]
1707
+ pub struct ImageInfo {
1708
+ pub url: String,
1709
+ pub alt: Option<String>,
1710
+ pub width: Option<u32>,
1711
+ pub height: Option<u32>,
1712
+ pub source: ImageSource,
1713
+ }
1714
+
1715
+ unsafe impl IntoValueFromNative for ImageInfo {}
1716
+
1717
+ impl magnus::TryConvert for ImageInfo {
1718
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1719
+ let r: &ImageInfo = magnus::TryConvert::try_convert(val)?;
1720
+ Ok(r.clone())
1721
+ }
1722
+ }
1723
+ unsafe impl TryConvertOwned for ImageInfo {}
1724
+
1725
+ impl ImageInfo {
1726
+ fn new(
1727
+ url: Option<String>,
1728
+ alt: Option<String>,
1729
+ width: Option<u32>,
1730
+ height: Option<u32>,
1731
+ source: Option<ImageSource>,
1732
+ ) -> Self {
1733
+ Self {
1734
+ url: url.unwrap_or_default(),
1735
+ alt,
1736
+ width,
1737
+ height,
1738
+ source: source.unwrap_or_default(),
1739
+ }
1740
+ }
1741
+
1742
+ fn url(&self) -> String {
1743
+ self.url.clone()
1744
+ }
1745
+
1746
+ fn alt(&self) -> Option<String> {
1747
+ self.alt.clone()
1748
+ }
1749
+
1750
+ fn width(&self) -> Option<u32> {
1751
+ self.width
1752
+ }
1753
+
1754
+ fn height(&self) -> Option<u32> {
1755
+ self.height
1756
+ }
1757
+
1758
+ fn source(&self) -> ImageSource {
1759
+ self.source.clone()
1760
+ }
1761
+ }
1762
+
1763
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
1764
+ #[magnus::wrap(class = "Kreuzcrawl::FeedInfo")]
1765
+ #[serde(default)]
1766
+ pub struct FeedInfo {
1767
+ pub url: String,
1768
+ pub title: Option<String>,
1769
+ pub feed_type: FeedType,
1770
+ }
1771
+
1772
+ unsafe impl IntoValueFromNative for FeedInfo {}
1773
+
1774
+ impl magnus::TryConvert for FeedInfo {
1775
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1776
+ let r: &FeedInfo = magnus::TryConvert::try_convert(val)?;
1777
+ Ok(r.clone())
1778
+ }
1779
+ }
1780
+ unsafe impl TryConvertOwned for FeedInfo {}
1781
+
1782
+ impl FeedInfo {
1783
+ fn new(url: Option<String>, title: Option<String>, feed_type: Option<FeedType>) -> Self {
1784
+ Self {
1785
+ url: url.unwrap_or_default(),
1786
+ title,
1787
+ feed_type: feed_type.unwrap_or_default(),
1788
+ }
1789
+ }
1790
+
1791
+ fn url(&self) -> String {
1792
+ self.url.clone()
1793
+ }
1794
+
1795
+ fn title(&self) -> Option<String> {
1796
+ self.title.clone()
1797
+ }
1798
+
1799
+ fn feed_type(&self) -> FeedType {
1800
+ self.feed_type.clone()
1801
+ }
1802
+ }
1803
+
1804
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1805
+ #[magnus::wrap(class = "Kreuzcrawl::JsonLdEntry")]
1806
+ #[serde(default)]
1807
+ pub struct JsonLdEntry {
1808
+ pub schema_type: String,
1809
+ pub name: Option<String>,
1810
+ pub raw: String,
1811
+ }
1812
+
1813
+ unsafe impl IntoValueFromNative for JsonLdEntry {}
1814
+
1815
+ impl magnus::TryConvert for JsonLdEntry {
1816
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1817
+ let r: &JsonLdEntry = magnus::TryConvert::try_convert(val)?;
1818
+ Ok(r.clone())
1819
+ }
1820
+ }
1821
+ unsafe impl TryConvertOwned for JsonLdEntry {}
1822
+
1823
+ impl Default for JsonLdEntry {
1824
+ fn default() -> Self {
1825
+ Self {
1826
+ schema_type: Default::default(),
1827
+ name: Default::default(),
1828
+ raw: Default::default(),
1829
+ }
1830
+ }
1831
+ }
1832
+
1833
+ impl JsonLdEntry {
1834
+ fn new(schema_type: Option<String>, name: Option<String>, raw: Option<String>) -> Self {
1835
+ Self {
1836
+ schema_type: schema_type.unwrap_or_default(),
1837
+ name,
1838
+ raw: raw.unwrap_or_default(),
1839
+ }
1840
+ }
1841
+
1842
+ fn schema_type(&self) -> String {
1843
+ self.schema_type.clone()
1844
+ }
1845
+
1846
+ fn name(&self) -> Option<String> {
1847
+ self.name.clone()
1848
+ }
1849
+
1850
+ fn raw(&self) -> String {
1851
+ self.raw.clone()
1852
+ }
1853
+ }
1854
+
1855
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1856
+ #[magnus::wrap(class = "Kreuzcrawl::CookieInfo")]
1857
+ #[serde(default)]
1858
+ pub struct CookieInfo {
1859
+ pub name: String,
1860
+ pub value: String,
1861
+ pub domain: Option<String>,
1862
+ pub path: Option<String>,
1863
+ }
1864
+
1865
+ unsafe impl IntoValueFromNative for CookieInfo {}
1866
+
1867
+ impl magnus::TryConvert for CookieInfo {
1868
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1869
+ let r: &CookieInfo = magnus::TryConvert::try_convert(val)?;
1870
+ Ok(r.clone())
1871
+ }
1872
+ }
1873
+ unsafe impl TryConvertOwned for CookieInfo {}
1874
+
1875
+ impl Default for CookieInfo {
1876
+ fn default() -> Self {
1877
+ Self {
1878
+ name: Default::default(),
1879
+ value: Default::default(),
1880
+ domain: Default::default(),
1881
+ path: Default::default(),
1882
+ }
1883
+ }
1884
+ }
1885
+
1886
+ impl CookieInfo {
1887
+ fn new(name: Option<String>, value: Option<String>, domain: Option<String>, path: Option<String>) -> Self {
1888
+ Self {
1889
+ name: name.unwrap_or_default(),
1890
+ value: value.unwrap_or_default(),
1891
+ domain,
1892
+ path,
1893
+ }
1894
+ }
1895
+
1896
+ fn name(&self) -> String {
1897
+ self.name.clone()
1898
+ }
1899
+
1900
+ fn value(&self) -> String {
1901
+ self.value.clone()
1902
+ }
1903
+
1904
+ fn domain(&self) -> Option<String> {
1905
+ self.domain.clone()
1906
+ }
1907
+
1908
+ fn path(&self) -> Option<String> {
1909
+ self.path.clone()
1910
+ }
1911
+ }
1912
+
1913
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize, Default)]
1914
+ #[magnus::wrap(class = "Kreuzcrawl::DownloadedAsset")]
1915
+ #[serde(default)]
1916
+ pub struct DownloadedAsset {
1917
+ pub url: String,
1918
+ pub content_hash: String,
1919
+ pub mime_type: Option<String>,
1920
+ pub size: usize,
1921
+ pub asset_category: AssetCategory,
1922
+ pub html_tag: Option<String>,
1923
+ }
1924
+
1925
+ unsafe impl IntoValueFromNative for DownloadedAsset {}
1926
+
1927
+ impl magnus::TryConvert for DownloadedAsset {
1928
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1929
+ let r: &DownloadedAsset = magnus::TryConvert::try_convert(val)?;
1930
+ Ok(r.clone())
1931
+ }
1932
+ }
1933
+ unsafe impl TryConvertOwned for DownloadedAsset {}
1934
+
1935
+ impl DownloadedAsset {
1936
+ fn new(
1937
+ url: Option<String>,
1938
+ content_hash: Option<String>,
1939
+ mime_type: Option<String>,
1940
+ size: Option<usize>,
1941
+ asset_category: Option<AssetCategory>,
1942
+ html_tag: Option<String>,
1943
+ ) -> Self {
1944
+ Self {
1945
+ url: url.unwrap_or_default(),
1946
+ content_hash: content_hash.unwrap_or_default(),
1947
+ mime_type,
1948
+ size: size.unwrap_or_default(),
1949
+ asset_category: asset_category.unwrap_or_default(),
1950
+ html_tag,
1951
+ }
1952
+ }
1953
+
1954
+ fn url(&self) -> String {
1955
+ self.url.clone()
1956
+ }
1957
+
1958
+ fn content_hash(&self) -> String {
1959
+ self.content_hash.clone()
1960
+ }
1961
+
1962
+ fn mime_type(&self) -> Option<String> {
1963
+ self.mime_type.clone()
1964
+ }
1965
+
1966
+ fn size(&self) -> usize {
1967
+ self.size
1968
+ }
1969
+
1970
+ fn asset_category(&self) -> AssetCategory {
1971
+ self.asset_category.clone()
1972
+ }
1973
+
1974
+ fn html_tag(&self) -> Option<String> {
1975
+ self.html_tag.clone()
1976
+ }
1977
+ }
1978
+
1979
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
1980
+ #[magnus::wrap(class = "Kreuzcrawl::ArticleMetadata")]
1981
+ #[serde(default)]
1982
+ pub struct ArticleMetadata {
1983
+ pub published_time: Option<String>,
1984
+ pub modified_time: Option<String>,
1985
+ pub author: Option<String>,
1986
+ pub section: Option<String>,
1987
+ pub tags: Vec<String>,
1988
+ }
1989
+
1990
+ unsafe impl IntoValueFromNative for ArticleMetadata {}
1991
+
1992
+ impl magnus::TryConvert for ArticleMetadata {
1993
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
1994
+ let r: &ArticleMetadata = magnus::TryConvert::try_convert(val)?;
1995
+ Ok(r.clone())
1996
+ }
1997
+ }
1998
+ unsafe impl TryConvertOwned for ArticleMetadata {}
1999
+
2000
+ impl Default for ArticleMetadata {
2001
+ fn default() -> Self {
2002
+ Self {
2003
+ published_time: Default::default(),
2004
+ modified_time: Default::default(),
2005
+ author: Default::default(),
2006
+ section: Default::default(),
2007
+ tags: Default::default(),
2008
+ }
2009
+ }
2010
+ }
2011
+
2012
+ impl ArticleMetadata {
2013
+ fn new(
2014
+ published_time: Option<String>,
2015
+ modified_time: Option<String>,
2016
+ author: Option<String>,
2017
+ section: Option<String>,
2018
+ tags: Option<Vec<String>>,
2019
+ ) -> Self {
2020
+ Self {
2021
+ published_time,
2022
+ modified_time,
2023
+ author,
2024
+ section,
2025
+ tags: tags.unwrap_or_default(),
2026
+ }
2027
+ }
2028
+
2029
+ fn published_time(&self) -> Option<String> {
2030
+ self.published_time.clone()
2031
+ }
2032
+
2033
+ fn modified_time(&self) -> Option<String> {
2034
+ self.modified_time.clone()
2035
+ }
2036
+
2037
+ fn author(&self) -> Option<String> {
2038
+ self.author.clone()
2039
+ }
2040
+
2041
+ fn section(&self) -> Option<String> {
2042
+ self.section.clone()
2043
+ }
2044
+
2045
+ fn tags(&self) -> Vec<String> {
2046
+ self.tags.clone()
2047
+ }
2048
+ }
2049
+
2050
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2051
+ #[magnus::wrap(class = "Kreuzcrawl::HreflangEntry")]
2052
+ #[serde(default)]
2053
+ pub struct HreflangEntry {
2054
+ pub lang: String,
2055
+ pub url: String,
2056
+ }
2057
+
2058
+ unsafe impl IntoValueFromNative for HreflangEntry {}
2059
+
2060
+ impl magnus::TryConvert for HreflangEntry {
2061
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2062
+ let r: &HreflangEntry = magnus::TryConvert::try_convert(val)?;
2063
+ Ok(r.clone())
2064
+ }
2065
+ }
2066
+ unsafe impl TryConvertOwned for HreflangEntry {}
2067
+
2068
+ impl Default for HreflangEntry {
2069
+ fn default() -> Self {
2070
+ Self {
2071
+ lang: Default::default(),
2072
+ url: Default::default(),
2073
+ }
2074
+ }
2075
+ }
2076
+
2077
+ impl HreflangEntry {
2078
+ fn new(lang: Option<String>, url: Option<String>) -> Self {
2079
+ Self {
2080
+ lang: lang.unwrap_or_default(),
2081
+ url: url.unwrap_or_default(),
2082
+ }
2083
+ }
2084
+
2085
+ fn lang(&self) -> String {
2086
+ self.lang.clone()
2087
+ }
2088
+
2089
+ fn url(&self) -> String {
2090
+ self.url.clone()
2091
+ }
2092
+ }
2093
+
2094
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2095
+ #[magnus::wrap(class = "Kreuzcrawl::FaviconInfo")]
2096
+ #[serde(default)]
2097
+ pub struct FaviconInfo {
2098
+ pub url: String,
2099
+ pub rel: String,
2100
+ pub sizes: Option<String>,
2101
+ pub mime_type: Option<String>,
2102
+ }
2103
+
2104
+ unsafe impl IntoValueFromNative for FaviconInfo {}
2105
+
2106
+ impl magnus::TryConvert for FaviconInfo {
2107
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2108
+ let r: &FaviconInfo = magnus::TryConvert::try_convert(val)?;
2109
+ Ok(r.clone())
2110
+ }
2111
+ }
2112
+ unsafe impl TryConvertOwned for FaviconInfo {}
2113
+
2114
+ impl Default for FaviconInfo {
2115
+ fn default() -> Self {
2116
+ Self {
2117
+ url: Default::default(),
2118
+ rel: Default::default(),
2119
+ sizes: Default::default(),
2120
+ mime_type: Default::default(),
2121
+ }
2122
+ }
2123
+ }
2124
+
2125
+ impl FaviconInfo {
2126
+ fn new(url: Option<String>, rel: Option<String>, sizes: Option<String>, mime_type: Option<String>) -> Self {
2127
+ Self {
2128
+ url: url.unwrap_or_default(),
2129
+ rel: rel.unwrap_or_default(),
2130
+ sizes,
2131
+ mime_type,
2132
+ }
2133
+ }
2134
+
2135
+ fn url(&self) -> String {
2136
+ self.url.clone()
2137
+ }
2138
+
2139
+ fn rel(&self) -> String {
2140
+ self.rel.clone()
2141
+ }
2142
+
2143
+ fn sizes(&self) -> Option<String> {
2144
+ self.sizes.clone()
2145
+ }
2146
+
2147
+ fn mime_type(&self) -> Option<String> {
2148
+ self.mime_type.clone()
2149
+ }
2150
+ }
2151
+
2152
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2153
+ #[magnus::wrap(class = "Kreuzcrawl::HeadingInfo")]
2154
+ #[serde(default)]
2155
+ pub struct HeadingInfo {
2156
+ pub level: u8,
2157
+ pub text: String,
2158
+ }
2159
+
2160
+ unsafe impl IntoValueFromNative for HeadingInfo {}
2161
+
2162
+ impl magnus::TryConvert for HeadingInfo {
2163
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2164
+ let r: &HeadingInfo = magnus::TryConvert::try_convert(val)?;
2165
+ Ok(r.clone())
2166
+ }
2167
+ }
2168
+ unsafe impl TryConvertOwned for HeadingInfo {}
2169
+
2170
+ impl Default for HeadingInfo {
2171
+ fn default() -> Self {
2172
+ Self {
2173
+ level: Default::default(),
2174
+ text: Default::default(),
2175
+ }
2176
+ }
2177
+ }
2178
+
2179
+ impl HeadingInfo {
2180
+ fn new(level: Option<u8>, text: Option<String>) -> Self {
2181
+ Self {
2182
+ level: level.unwrap_or_default(),
2183
+ text: text.unwrap_or_default(),
2184
+ }
2185
+ }
2186
+
2187
+ fn level(&self) -> u8 {
2188
+ self.level
2189
+ }
2190
+
2191
+ fn text(&self) -> String {
2192
+ self.text.clone()
2193
+ }
2194
+ }
2195
+
2196
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2197
+ #[magnus::wrap(class = "Kreuzcrawl::ResponseMeta")]
2198
+ #[serde(default)]
2199
+ pub struct ResponseMeta {
2200
+ pub etag: Option<String>,
2201
+ pub last_modified: Option<String>,
2202
+ pub cache_control: Option<String>,
2203
+ pub server: Option<String>,
2204
+ pub x_powered_by: Option<String>,
2205
+ pub content_language: Option<String>,
2206
+ pub content_encoding: Option<String>,
2207
+ }
2208
+
2209
+ unsafe impl IntoValueFromNative for ResponseMeta {}
2210
+
2211
+ impl magnus::TryConvert for ResponseMeta {
2212
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2213
+ let r: &ResponseMeta = magnus::TryConvert::try_convert(val)?;
2214
+ Ok(r.clone())
2215
+ }
2216
+ }
2217
+ unsafe impl TryConvertOwned for ResponseMeta {}
2218
+
2219
+ impl Default for ResponseMeta {
2220
+ fn default() -> Self {
2221
+ Self {
2222
+ etag: Default::default(),
2223
+ last_modified: Default::default(),
2224
+ cache_control: Default::default(),
2225
+ server: Default::default(),
2226
+ x_powered_by: Default::default(),
2227
+ content_language: Default::default(),
2228
+ content_encoding: Default::default(),
2229
+ }
2230
+ }
2231
+ }
2232
+
2233
+ impl ResponseMeta {
2234
+ fn new(
2235
+ etag: Option<String>,
2236
+ last_modified: Option<String>,
2237
+ cache_control: Option<String>,
2238
+ server: Option<String>,
2239
+ x_powered_by: Option<String>,
2240
+ content_language: Option<String>,
2241
+ content_encoding: Option<String>,
2242
+ ) -> Self {
2243
+ Self {
2244
+ etag,
2245
+ last_modified,
2246
+ cache_control,
2247
+ server,
2248
+ x_powered_by,
2249
+ content_language,
2250
+ content_encoding,
2251
+ }
2252
+ }
2253
+
2254
+ fn etag(&self) -> Option<String> {
2255
+ self.etag.clone()
2256
+ }
2257
+
2258
+ fn last_modified(&self) -> Option<String> {
2259
+ self.last_modified.clone()
2260
+ }
2261
+
2262
+ fn cache_control(&self) -> Option<String> {
2263
+ self.cache_control.clone()
2264
+ }
2265
+
2266
+ fn server(&self) -> Option<String> {
2267
+ self.server.clone()
2268
+ }
2269
+
2270
+ fn x_powered_by(&self) -> Option<String> {
2271
+ self.x_powered_by.clone()
2272
+ }
2273
+
2274
+ fn content_language(&self) -> Option<String> {
2275
+ self.content_language.clone()
2276
+ }
2277
+
2278
+ fn content_encoding(&self) -> Option<String> {
2279
+ self.content_encoding.clone()
2280
+ }
2281
+ }
2282
+
2283
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2284
+ #[magnus::wrap(class = "Kreuzcrawl::PageMetadata")]
2285
+ #[serde(default)]
2286
+ pub struct PageMetadata {
2287
+ pub title: Option<String>,
2288
+ pub description: Option<String>,
2289
+ pub canonical_url: Option<String>,
2290
+ pub keywords: Option<String>,
2291
+ pub author: Option<String>,
2292
+ pub viewport: Option<String>,
2293
+ pub theme_color: Option<String>,
2294
+ pub generator: Option<String>,
2295
+ pub robots: Option<String>,
2296
+ pub html_lang: Option<String>,
2297
+ pub html_dir: Option<String>,
2298
+ pub og_title: Option<String>,
2299
+ pub og_type: Option<String>,
2300
+ pub og_image: Option<String>,
2301
+ pub og_description: Option<String>,
2302
+ pub og_url: Option<String>,
2303
+ pub og_site_name: Option<String>,
2304
+ pub og_locale: Option<String>,
2305
+ pub og_video: Option<String>,
2306
+ pub og_audio: Option<String>,
2307
+ pub og_locale_alternates: Option<Vec<String>>,
2308
+ pub twitter_card: Option<String>,
2309
+ pub twitter_title: Option<String>,
2310
+ pub twitter_description: Option<String>,
2311
+ pub twitter_image: Option<String>,
2312
+ pub twitter_site: Option<String>,
2313
+ pub twitter_creator: Option<String>,
2314
+ pub dc_title: Option<String>,
2315
+ pub dc_creator: Option<String>,
2316
+ pub dc_subject: Option<String>,
2317
+ pub dc_description: Option<String>,
2318
+ pub dc_publisher: Option<String>,
2319
+ pub dc_date: Option<String>,
2320
+ pub dc_type: Option<String>,
2321
+ pub dc_format: Option<String>,
2322
+ pub dc_identifier: Option<String>,
2323
+ pub dc_language: Option<String>,
2324
+ pub dc_rights: Option<String>,
2325
+ pub article: Option<ArticleMetadata>,
2326
+ pub hreflangs: Option<Vec<HreflangEntry>>,
2327
+ pub favicons: Option<Vec<FaviconInfo>>,
2328
+ pub headings: Option<Vec<HeadingInfo>>,
2329
+ pub word_count: Option<usize>,
2330
+ }
2331
+
2332
+ unsafe impl IntoValueFromNative for PageMetadata {}
2333
+
2334
+ impl magnus::TryConvert for PageMetadata {
2335
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2336
+ let r: &PageMetadata = magnus::TryConvert::try_convert(val)?;
2337
+ Ok(r.clone())
2338
+ }
2339
+ }
2340
+ unsafe impl TryConvertOwned for PageMetadata {}
2341
+
2342
+ impl Default for PageMetadata {
2343
+ fn default() -> Self {
2344
+ Self {
2345
+ title: Default::default(),
2346
+ description: Default::default(),
2347
+ canonical_url: Default::default(),
2348
+ keywords: Default::default(),
2349
+ author: Default::default(),
2350
+ viewport: Default::default(),
2351
+ theme_color: Default::default(),
2352
+ generator: Default::default(),
2353
+ robots: Default::default(),
2354
+ html_lang: Default::default(),
2355
+ html_dir: Default::default(),
2356
+ og_title: Default::default(),
2357
+ og_type: Default::default(),
2358
+ og_image: Default::default(),
2359
+ og_description: Default::default(),
2360
+ og_url: Default::default(),
2361
+ og_site_name: Default::default(),
2362
+ og_locale: Default::default(),
2363
+ og_video: Default::default(),
2364
+ og_audio: Default::default(),
2365
+ og_locale_alternates: Default::default(),
2366
+ twitter_card: Default::default(),
2367
+ twitter_title: Default::default(),
2368
+ twitter_description: Default::default(),
2369
+ twitter_image: Default::default(),
2370
+ twitter_site: Default::default(),
2371
+ twitter_creator: Default::default(),
2372
+ dc_title: Default::default(),
2373
+ dc_creator: Default::default(),
2374
+ dc_subject: Default::default(),
2375
+ dc_description: Default::default(),
2376
+ dc_publisher: Default::default(),
2377
+ dc_date: Default::default(),
2378
+ dc_type: Default::default(),
2379
+ dc_format: Default::default(),
2380
+ dc_identifier: Default::default(),
2381
+ dc_language: Default::default(),
2382
+ dc_rights: Default::default(),
2383
+ article: Default::default(),
2384
+ hreflangs: Default::default(),
2385
+ favicons: Default::default(),
2386
+ headings: Default::default(),
2387
+ word_count: Default::default(),
2388
+ }
2389
+ }
2390
+ }
2391
+
2392
+ impl PageMetadata {
2393
+ fn new(kwargs: magnus::RHash) -> Result<Self, magnus::Error> {
2394
+ let ruby = unsafe { magnus::Ruby::get_unchecked() };
2395
+ Ok(Self {
2396
+ title: kwargs
2397
+ .get(ruby.to_symbol("title"))
2398
+ .and_then(|v| String::try_convert(v).ok()),
2399
+ description: kwargs
2400
+ .get(ruby.to_symbol("description"))
2401
+ .and_then(|v| String::try_convert(v).ok()),
2402
+ canonical_url: kwargs
2403
+ .get(ruby.to_symbol("canonical_url"))
2404
+ .and_then(|v| String::try_convert(v).ok()),
2405
+ keywords: kwargs
2406
+ .get(ruby.to_symbol("keywords"))
2407
+ .and_then(|v| String::try_convert(v).ok()),
2408
+ author: kwargs
2409
+ .get(ruby.to_symbol("author"))
2410
+ .and_then(|v| String::try_convert(v).ok()),
2411
+ viewport: kwargs
2412
+ .get(ruby.to_symbol("viewport"))
2413
+ .and_then(|v| String::try_convert(v).ok()),
2414
+ theme_color: kwargs
2415
+ .get(ruby.to_symbol("theme_color"))
2416
+ .and_then(|v| String::try_convert(v).ok()),
2417
+ generator: kwargs
2418
+ .get(ruby.to_symbol("generator"))
2419
+ .and_then(|v| String::try_convert(v).ok()),
2420
+ robots: kwargs
2421
+ .get(ruby.to_symbol("robots"))
2422
+ .and_then(|v| String::try_convert(v).ok()),
2423
+ html_lang: kwargs
2424
+ .get(ruby.to_symbol("html_lang"))
2425
+ .and_then(|v| String::try_convert(v).ok()),
2426
+ html_dir: kwargs
2427
+ .get(ruby.to_symbol("html_dir"))
2428
+ .and_then(|v| String::try_convert(v).ok()),
2429
+ og_title: kwargs
2430
+ .get(ruby.to_symbol("og_title"))
2431
+ .and_then(|v| String::try_convert(v).ok()),
2432
+ og_type: kwargs
2433
+ .get(ruby.to_symbol("og_type"))
2434
+ .and_then(|v| String::try_convert(v).ok()),
2435
+ og_image: kwargs
2436
+ .get(ruby.to_symbol("og_image"))
2437
+ .and_then(|v| String::try_convert(v).ok()),
2438
+ og_description: kwargs
2439
+ .get(ruby.to_symbol("og_description"))
2440
+ .and_then(|v| String::try_convert(v).ok()),
2441
+ og_url: kwargs
2442
+ .get(ruby.to_symbol("og_url"))
2443
+ .and_then(|v| String::try_convert(v).ok()),
2444
+ og_site_name: kwargs
2445
+ .get(ruby.to_symbol("og_site_name"))
2446
+ .and_then(|v| String::try_convert(v).ok()),
2447
+ og_locale: kwargs
2448
+ .get(ruby.to_symbol("og_locale"))
2449
+ .and_then(|v| String::try_convert(v).ok()),
2450
+ og_video: kwargs
2451
+ .get(ruby.to_symbol("og_video"))
2452
+ .and_then(|v| String::try_convert(v).ok()),
2453
+ og_audio: kwargs
2454
+ .get(ruby.to_symbol("og_audio"))
2455
+ .and_then(|v| String::try_convert(v).ok()),
2456
+ og_locale_alternates: kwargs
2457
+ .get(ruby.to_symbol("og_locale_alternates"))
2458
+ .and_then(|v| <Vec<String>>::try_convert(v).ok()),
2459
+ twitter_card: kwargs
2460
+ .get(ruby.to_symbol("twitter_card"))
2461
+ .and_then(|v| String::try_convert(v).ok()),
2462
+ twitter_title: kwargs
2463
+ .get(ruby.to_symbol("twitter_title"))
2464
+ .and_then(|v| String::try_convert(v).ok()),
2465
+ twitter_description: kwargs
2466
+ .get(ruby.to_symbol("twitter_description"))
2467
+ .and_then(|v| String::try_convert(v).ok()),
2468
+ twitter_image: kwargs
2469
+ .get(ruby.to_symbol("twitter_image"))
2470
+ .and_then(|v| String::try_convert(v).ok()),
2471
+ twitter_site: kwargs
2472
+ .get(ruby.to_symbol("twitter_site"))
2473
+ .and_then(|v| String::try_convert(v).ok()),
2474
+ twitter_creator: kwargs
2475
+ .get(ruby.to_symbol("twitter_creator"))
2476
+ .and_then(|v| String::try_convert(v).ok()),
2477
+ dc_title: kwargs
2478
+ .get(ruby.to_symbol("dc_title"))
2479
+ .and_then(|v| String::try_convert(v).ok()),
2480
+ dc_creator: kwargs
2481
+ .get(ruby.to_symbol("dc_creator"))
2482
+ .and_then(|v| String::try_convert(v).ok()),
2483
+ dc_subject: kwargs
2484
+ .get(ruby.to_symbol("dc_subject"))
2485
+ .and_then(|v| String::try_convert(v).ok()),
2486
+ dc_description: kwargs
2487
+ .get(ruby.to_symbol("dc_description"))
2488
+ .and_then(|v| String::try_convert(v).ok()),
2489
+ dc_publisher: kwargs
2490
+ .get(ruby.to_symbol("dc_publisher"))
2491
+ .and_then(|v| String::try_convert(v).ok()),
2492
+ dc_date: kwargs
2493
+ .get(ruby.to_symbol("dc_date"))
2494
+ .and_then(|v| String::try_convert(v).ok()),
2495
+ dc_type: kwargs
2496
+ .get(ruby.to_symbol("dc_type"))
2497
+ .and_then(|v| String::try_convert(v).ok()),
2498
+ dc_format: kwargs
2499
+ .get(ruby.to_symbol("dc_format"))
2500
+ .and_then(|v| String::try_convert(v).ok()),
2501
+ dc_identifier: kwargs
2502
+ .get(ruby.to_symbol("dc_identifier"))
2503
+ .and_then(|v| String::try_convert(v).ok()),
2504
+ dc_language: kwargs
2505
+ .get(ruby.to_symbol("dc_language"))
2506
+ .and_then(|v| String::try_convert(v).ok()),
2507
+ dc_rights: kwargs
2508
+ .get(ruby.to_symbol("dc_rights"))
2509
+ .and_then(|v| String::try_convert(v).ok()),
2510
+ article: kwargs
2511
+ .get(ruby.to_symbol("article"))
2512
+ .and_then(|v| ArticleMetadata::try_convert(v).ok()),
2513
+ hreflangs: kwargs
2514
+ .get(ruby.to_symbol("hreflangs"))
2515
+ .and_then(|v| <Vec<HreflangEntry>>::try_convert(v).ok()),
2516
+ favicons: kwargs
2517
+ .get(ruby.to_symbol("favicons"))
2518
+ .and_then(|v| <Vec<FaviconInfo>>::try_convert(v).ok()),
2519
+ headings: kwargs
2520
+ .get(ruby.to_symbol("headings"))
2521
+ .and_then(|v| <Vec<HeadingInfo>>::try_convert(v).ok()),
2522
+ word_count: kwargs
2523
+ .get(ruby.to_symbol("word_count"))
2524
+ .and_then(|v| usize::try_convert(v).ok()),
2525
+ })
2526
+ }
2527
+
2528
+ fn title(&self) -> Option<String> {
2529
+ self.title.clone()
2530
+ }
2531
+
2532
+ fn description(&self) -> Option<String> {
2533
+ self.description.clone()
2534
+ }
2535
+
2536
+ fn canonical_url(&self) -> Option<String> {
2537
+ self.canonical_url.clone()
2538
+ }
2539
+
2540
+ fn keywords(&self) -> Option<String> {
2541
+ self.keywords.clone()
2542
+ }
2543
+
2544
+ fn author(&self) -> Option<String> {
2545
+ self.author.clone()
2546
+ }
2547
+
2548
+ fn viewport(&self) -> Option<String> {
2549
+ self.viewport.clone()
2550
+ }
2551
+
2552
+ fn theme_color(&self) -> Option<String> {
2553
+ self.theme_color.clone()
2554
+ }
2555
+
2556
+ fn generator(&self) -> Option<String> {
2557
+ self.generator.clone()
2558
+ }
2559
+
2560
+ fn robots(&self) -> Option<String> {
2561
+ self.robots.clone()
2562
+ }
2563
+
2564
+ fn html_lang(&self) -> Option<String> {
2565
+ self.html_lang.clone()
2566
+ }
2567
+
2568
+ fn html_dir(&self) -> Option<String> {
2569
+ self.html_dir.clone()
2570
+ }
2571
+
2572
+ fn og_title(&self) -> Option<String> {
2573
+ self.og_title.clone()
2574
+ }
2575
+
2576
+ fn og_type(&self) -> Option<String> {
2577
+ self.og_type.clone()
2578
+ }
2579
+
2580
+ fn og_image(&self) -> Option<String> {
2581
+ self.og_image.clone()
2582
+ }
2583
+
2584
+ fn og_description(&self) -> Option<String> {
2585
+ self.og_description.clone()
2586
+ }
2587
+
2588
+ fn og_url(&self) -> Option<String> {
2589
+ self.og_url.clone()
2590
+ }
2591
+
2592
+ fn og_site_name(&self) -> Option<String> {
2593
+ self.og_site_name.clone()
2594
+ }
2595
+
2596
+ fn og_locale(&self) -> Option<String> {
2597
+ self.og_locale.clone()
2598
+ }
2599
+
2600
+ fn og_video(&self) -> Option<String> {
2601
+ self.og_video.clone()
2602
+ }
2603
+
2604
+ fn og_audio(&self) -> Option<String> {
2605
+ self.og_audio.clone()
2606
+ }
2607
+
2608
+ fn og_locale_alternates(&self) -> Option<Vec<String>> {
2609
+ self.og_locale_alternates.clone()
2610
+ }
2611
+
2612
+ fn twitter_card(&self) -> Option<String> {
2613
+ self.twitter_card.clone()
2614
+ }
2615
+
2616
+ fn twitter_title(&self) -> Option<String> {
2617
+ self.twitter_title.clone()
2618
+ }
2619
+
2620
+ fn twitter_description(&self) -> Option<String> {
2621
+ self.twitter_description.clone()
2622
+ }
2623
+
2624
+ fn twitter_image(&self) -> Option<String> {
2625
+ self.twitter_image.clone()
2626
+ }
2627
+
2628
+ fn twitter_site(&self) -> Option<String> {
2629
+ self.twitter_site.clone()
2630
+ }
2631
+
2632
+ fn twitter_creator(&self) -> Option<String> {
2633
+ self.twitter_creator.clone()
2634
+ }
2635
+
2636
+ fn dc_title(&self) -> Option<String> {
2637
+ self.dc_title.clone()
2638
+ }
2639
+
2640
+ fn dc_creator(&self) -> Option<String> {
2641
+ self.dc_creator.clone()
2642
+ }
2643
+
2644
+ fn dc_subject(&self) -> Option<String> {
2645
+ self.dc_subject.clone()
2646
+ }
2647
+
2648
+ fn dc_description(&self) -> Option<String> {
2649
+ self.dc_description.clone()
2650
+ }
2651
+
2652
+ fn dc_publisher(&self) -> Option<String> {
2653
+ self.dc_publisher.clone()
2654
+ }
2655
+
2656
+ fn dc_date(&self) -> Option<String> {
2657
+ self.dc_date.clone()
2658
+ }
2659
+
2660
+ fn dc_type(&self) -> Option<String> {
2661
+ self.dc_type.clone()
2662
+ }
2663
+
2664
+ fn dc_format(&self) -> Option<String> {
2665
+ self.dc_format.clone()
2666
+ }
2667
+
2668
+ fn dc_identifier(&self) -> Option<String> {
2669
+ self.dc_identifier.clone()
2670
+ }
2671
+
2672
+ fn dc_language(&self) -> Option<String> {
2673
+ self.dc_language.clone()
2674
+ }
2675
+
2676
+ fn dc_rights(&self) -> Option<String> {
2677
+ self.dc_rights.clone()
2678
+ }
2679
+
2680
+ fn article(&self) -> Option<ArticleMetadata> {
2681
+ self.article.clone()
2682
+ }
2683
+
2684
+ fn hreflangs(&self) -> Option<Vec<HreflangEntry>> {
2685
+ self.hreflangs.clone()
2686
+ }
2687
+
2688
+ fn favicons(&self) -> Option<Vec<FaviconInfo>> {
2689
+ self.favicons.clone()
2690
+ }
2691
+
2692
+ fn headings(&self) -> Option<Vec<HeadingInfo>> {
2693
+ self.headings.clone()
2694
+ }
2695
+
2696
+ fn word_count(&self) -> Option<usize> {
2697
+ self.word_count
2698
+ }
2699
+ }
2700
+
2701
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2702
+ #[magnus::wrap(class = "Kreuzcrawl::CitationResult")]
2703
+ #[serde(default)]
2704
+ pub struct CitationResult {
2705
+ pub content: String,
2706
+ pub references: Vec<CitationReference>,
2707
+ }
2708
+
2709
+ unsafe impl IntoValueFromNative for CitationResult {}
2710
+
2711
+ impl magnus::TryConvert for CitationResult {
2712
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2713
+ let r: &CitationResult = magnus::TryConvert::try_convert(val)?;
2714
+ Ok(r.clone())
2715
+ }
2716
+ }
2717
+ unsafe impl TryConvertOwned for CitationResult {}
2718
+
2719
+ impl Default for CitationResult {
2720
+ fn default() -> Self {
2721
+ Self {
2722
+ content: Default::default(),
2723
+ references: Default::default(),
2724
+ }
2725
+ }
2726
+ }
2727
+
2728
+ impl CitationResult {
2729
+ fn new(content: Option<String>, references: Option<Vec<CitationReference>>) -> Self {
2730
+ Self {
2731
+ content: content.unwrap_or_default(),
2732
+ references: references.unwrap_or_default(),
2733
+ }
2734
+ }
2735
+
2736
+ fn content(&self) -> String {
2737
+ self.content.clone()
2738
+ }
2739
+
2740
+ fn references(&self) -> Vec<CitationReference> {
2741
+ self.references.clone()
2742
+ }
2743
+ }
2744
+
2745
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2746
+ #[magnus::wrap(class = "Kreuzcrawl::CitationReference")]
2747
+ #[serde(default)]
2748
+ pub struct CitationReference {
2749
+ pub index: usize,
2750
+ pub url: String,
2751
+ pub text: String,
2752
+ }
2753
+
2754
+ unsafe impl IntoValueFromNative for CitationReference {}
2755
+
2756
+ impl magnus::TryConvert for CitationReference {
2757
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2758
+ let r: &CitationReference = magnus::TryConvert::try_convert(val)?;
2759
+ Ok(r.clone())
2760
+ }
2761
+ }
2762
+ unsafe impl TryConvertOwned for CitationReference {}
2763
+
2764
+ impl Default for CitationReference {
2765
+ fn default() -> Self {
2766
+ Self {
2767
+ index: Default::default(),
2768
+ url: Default::default(),
2769
+ text: Default::default(),
2770
+ }
2771
+ }
2772
+ }
2773
+
2774
+ impl CitationReference {
2775
+ fn new(index: Option<usize>, url: Option<String>, text: Option<String>) -> Self {
2776
+ Self {
2777
+ index: index.unwrap_or_default(),
2778
+ url: url.unwrap_or_default(),
2779
+ text: text.unwrap_or_default(),
2780
+ }
2781
+ }
2782
+
2783
+ fn index(&self) -> usize {
2784
+ self.index
2785
+ }
2786
+
2787
+ fn url(&self) -> String {
2788
+ self.url.clone()
2789
+ }
2790
+
2791
+ fn text(&self) -> String {
2792
+ self.text.clone()
2793
+ }
2794
+ }
2795
+
2796
+ #[derive(Clone)]
2797
+ #[magnus::wrap(class = "Kreuzcrawl::CrawlEngineHandle")]
2798
+ pub struct CrawlEngineHandle {
2799
+ inner: Arc<kreuzcrawl::CrawlEngineHandle>,
2800
+ }
2801
+
2802
+ unsafe impl IntoValueFromNative for CrawlEngineHandle {}
2803
+
2804
+ impl magnus::TryConvert for CrawlEngineHandle {
2805
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2806
+ let r: &CrawlEngineHandle = magnus::TryConvert::try_convert(val)?;
2807
+ Ok(r.clone())
2808
+ }
2809
+ }
2810
+ unsafe impl TryConvertOwned for CrawlEngineHandle {}
2811
+
2812
+ impl CrawlEngineHandle {}
2813
+
2814
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2815
+ #[magnus::wrap(class = "Kreuzcrawl::BatchScrapeResult")]
2816
+ #[serde(default)]
2817
+ pub struct BatchScrapeResult {
2818
+ pub url: String,
2819
+ pub result: Option<ScrapeResult>,
2820
+ pub error: Option<String>,
2821
+ }
2822
+
2823
+ unsafe impl IntoValueFromNative for BatchScrapeResult {}
2824
+
2825
+ impl magnus::TryConvert for BatchScrapeResult {
2826
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2827
+ let r: &BatchScrapeResult = magnus::TryConvert::try_convert(val)?;
2828
+ Ok(r.clone())
2829
+ }
2830
+ }
2831
+ unsafe impl TryConvertOwned for BatchScrapeResult {}
2832
+
2833
+ impl Default for BatchScrapeResult {
2834
+ fn default() -> Self {
2835
+ Self {
2836
+ url: Default::default(),
2837
+ result: Default::default(),
2838
+ error: Default::default(),
2839
+ }
2840
+ }
2841
+ }
2842
+
2843
+ impl BatchScrapeResult {
2844
+ fn new(url: Option<String>, result: Option<ScrapeResult>, error: Option<String>) -> Self {
2845
+ Self {
2846
+ url: url.unwrap_or_default(),
2847
+ result,
2848
+ error,
2849
+ }
2850
+ }
2851
+
2852
+ fn url(&self) -> String {
2853
+ self.url.clone()
2854
+ }
2855
+
2856
+ fn result(&self) -> Option<ScrapeResult> {
2857
+ self.result.clone()
2858
+ }
2859
+
2860
+ fn error(&self) -> Option<String> {
2861
+ self.error.clone()
2862
+ }
2863
+ }
2864
+
2865
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
2866
+ #[magnus::wrap(class = "Kreuzcrawl::BatchCrawlResult")]
2867
+ #[serde(default)]
2868
+ pub struct BatchCrawlResult {
2869
+ pub url: String,
2870
+ pub result: Option<CrawlResult>,
2871
+ pub error: Option<String>,
2872
+ }
2873
+
2874
+ unsafe impl IntoValueFromNative for BatchCrawlResult {}
2875
+
2876
+ impl magnus::TryConvert for BatchCrawlResult {
2877
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2878
+ let r: &BatchCrawlResult = magnus::TryConvert::try_convert(val)?;
2879
+ Ok(r.clone())
2880
+ }
2881
+ }
2882
+ unsafe impl TryConvertOwned for BatchCrawlResult {}
2883
+
2884
+ impl Default for BatchCrawlResult {
2885
+ fn default() -> Self {
2886
+ Self {
2887
+ url: Default::default(),
2888
+ result: Default::default(),
2889
+ error: Default::default(),
2890
+ }
2891
+ }
2892
+ }
2893
+
2894
+ impl BatchCrawlResult {
2895
+ fn new(url: Option<String>, result: Option<CrawlResult>, error: Option<String>) -> Self {
2896
+ Self {
2897
+ url: url.unwrap_or_default(),
2898
+ result,
2899
+ error,
2900
+ }
2901
+ }
2902
+
2903
+ fn url(&self) -> String {
2904
+ self.url.clone()
2905
+ }
2906
+
2907
+ fn result(&self) -> Option<CrawlResult> {
2908
+ self.result.clone()
2909
+ }
2910
+
2911
+ fn error(&self) -> Option<String> {
2912
+ self.error.clone()
2913
+ }
2914
+ }
2915
+
2916
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
2917
+ pub enum BrowserMode {
2918
+ Auto,
2919
+ Always,
2920
+ Never,
2921
+ }
2922
+
2923
+ impl Default for BrowserMode {
2924
+ fn default() -> Self {
2925
+ Self::Auto
2926
+ }
2927
+ }
2928
+
2929
+ impl magnus::IntoValue for BrowserMode {
2930
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
2931
+ let sym = match self {
2932
+ BrowserMode::Auto => "auto",
2933
+ BrowserMode::Always => "always",
2934
+ BrowserMode::Never => "never",
2935
+ };
2936
+ handle.to_symbol(sym).into_value_with(handle)
2937
+ }
2938
+ }
2939
+
2940
+ impl magnus::TryConvert for BrowserMode {
2941
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2942
+ let s: String = magnus::TryConvert::try_convert(val)?;
2943
+ match s.as_str() {
2944
+ "auto" => Ok(BrowserMode::Auto),
2945
+ "always" => Ok(BrowserMode::Always),
2946
+ "never" => Ok(BrowserMode::Never),
2947
+ other => Err(magnus::Error::new(
2948
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
2949
+ format!("invalid BrowserMode value: {other}"),
2950
+ )),
2951
+ }
2952
+ }
2953
+ }
2954
+
2955
+ unsafe impl IntoValueFromNative for BrowserMode {}
2956
+ unsafe impl TryConvertOwned for BrowserMode {}
2957
+
2958
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
2959
+ pub enum BrowserWait {
2960
+ NetworkIdle,
2961
+ Selector,
2962
+ Fixed,
2963
+ }
2964
+
2965
+ impl Default for BrowserWait {
2966
+ fn default() -> Self {
2967
+ Self::NetworkIdle
2968
+ }
2969
+ }
2970
+
2971
+ impl magnus::IntoValue for BrowserWait {
2972
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
2973
+ let sym = match self {
2974
+ BrowserWait::NetworkIdle => "network_idle",
2975
+ BrowserWait::Selector => "selector",
2976
+ BrowserWait::Fixed => "fixed",
2977
+ };
2978
+ handle.to_symbol(sym).into_value_with(handle)
2979
+ }
2980
+ }
2981
+
2982
+ impl magnus::TryConvert for BrowserWait {
2983
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
2984
+ let s: String = magnus::TryConvert::try_convert(val)?;
2985
+ match s.as_str() {
2986
+ "network_idle" => Ok(BrowserWait::NetworkIdle),
2987
+ "selector" => Ok(BrowserWait::Selector),
2988
+ "fixed" => Ok(BrowserWait::Fixed),
2989
+ other => Err(magnus::Error::new(
2990
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
2991
+ format!("invalid BrowserWait value: {other}"),
2992
+ )),
2993
+ }
2994
+ }
2995
+ }
2996
+
2997
+ unsafe impl IntoValueFromNative for BrowserWait {}
2998
+ unsafe impl TryConvertOwned for BrowserWait {}
2999
+
3000
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
3001
+ #[serde(tag = "type")]
3002
+ pub enum AuthConfig {
3003
+ #[serde(rename = "basic")]
3004
+ Basic { username: String, password: String },
3005
+ #[serde(rename = "bearer")]
3006
+ Bearer { token: String },
3007
+ #[serde(rename = "header")]
3008
+ Header { name: String, value: String },
3009
+ }
3010
+
3011
+ impl Default for AuthConfig {
3012
+ fn default() -> Self {
3013
+ Self::Basic {
3014
+ username: Default::default(),
3015
+ password: Default::default(),
3016
+ }
3017
+ }
3018
+ }
3019
+
3020
+ impl magnus::IntoValue for AuthConfig {
3021
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3022
+ match serde_json::to_value(&self) {
3023
+ Ok(v) => json_to_ruby(handle, v),
3024
+ Err(_) => handle.qnil().into_value_with(handle),
3025
+ }
3026
+ }
3027
+ }
3028
+
3029
+ impl magnus::TryConvert for AuthConfig {
3030
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3031
+ let s: String = magnus::TryConvert::try_convert(val)?;
3032
+ serde_json::from_str(&s)
3033
+ .map_err(|e| magnus::Error::new(unsafe { Ruby::get_unchecked() }.exception_type_error(), e.to_string()))
3034
+ }
3035
+ }
3036
+
3037
+ unsafe impl IntoValueFromNative for AuthConfig {}
3038
+ unsafe impl TryConvertOwned for AuthConfig {}
3039
+
3040
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
3041
+ pub enum LinkType {
3042
+ Internal,
3043
+ External,
3044
+ Anchor,
3045
+ Document,
3046
+ }
3047
+
3048
+ impl Default for LinkType {
3049
+ fn default() -> Self {
3050
+ Self::Internal
3051
+ }
3052
+ }
3053
+
3054
+ impl magnus::IntoValue for LinkType {
3055
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3056
+ let sym = match self {
3057
+ LinkType::Internal => "internal",
3058
+ LinkType::External => "external",
3059
+ LinkType::Anchor => "anchor",
3060
+ LinkType::Document => "document",
3061
+ };
3062
+ handle.to_symbol(sym).into_value_with(handle)
3063
+ }
3064
+ }
3065
+
3066
+ impl magnus::TryConvert for LinkType {
3067
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3068
+ let s: String = magnus::TryConvert::try_convert(val)?;
3069
+ match s.as_str() {
3070
+ "internal" => Ok(LinkType::Internal),
3071
+ "external" => Ok(LinkType::External),
3072
+ "anchor" => Ok(LinkType::Anchor),
3073
+ "document" => Ok(LinkType::Document),
3074
+ other => Err(magnus::Error::new(
3075
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
3076
+ format!("invalid LinkType value: {other}"),
3077
+ )),
3078
+ }
3079
+ }
3080
+ }
3081
+
3082
+ unsafe impl IntoValueFromNative for LinkType {}
3083
+ unsafe impl TryConvertOwned for LinkType {}
3084
+
3085
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
3086
+ pub enum ImageSource {
3087
+ Img,
3088
+ PictureSource,
3089
+ #[serde(rename = "og:image")]
3090
+ OgImage,
3091
+ #[serde(rename = "twitter:image")]
3092
+ TwitterImage,
3093
+ }
3094
+
3095
+ impl Default for ImageSource {
3096
+ fn default() -> Self {
3097
+ Self::Img
3098
+ }
3099
+ }
3100
+
3101
+ impl magnus::IntoValue for ImageSource {
3102
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3103
+ let sym = match self {
3104
+ ImageSource::Img => "img",
3105
+ ImageSource::PictureSource => "picture_source",
3106
+ ImageSource::OgImage => "og_image",
3107
+ ImageSource::TwitterImage => "twitter_image",
3108
+ };
3109
+ handle.to_symbol(sym).into_value_with(handle)
3110
+ }
3111
+ }
3112
+
3113
+ impl magnus::TryConvert for ImageSource {
3114
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3115
+ let s: String = magnus::TryConvert::try_convert(val)?;
3116
+ match s.as_str() {
3117
+ "img" => Ok(ImageSource::Img),
3118
+ "picture_source" => Ok(ImageSource::PictureSource),
3119
+ "og_image" => Ok(ImageSource::OgImage),
3120
+ "twitter_image" => Ok(ImageSource::TwitterImage),
3121
+ other => Err(magnus::Error::new(
3122
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
3123
+ format!("invalid ImageSource value: {other}"),
3124
+ )),
3125
+ }
3126
+ }
3127
+ }
3128
+
3129
+ unsafe impl IntoValueFromNative for ImageSource {}
3130
+ unsafe impl TryConvertOwned for ImageSource {}
3131
+
3132
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
3133
+ pub enum FeedType {
3134
+ Rss,
3135
+ Atom,
3136
+ JsonFeed,
3137
+ }
3138
+
3139
+ impl Default for FeedType {
3140
+ fn default() -> Self {
3141
+ Self::Rss
3142
+ }
3143
+ }
3144
+
3145
+ impl magnus::IntoValue for FeedType {
3146
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3147
+ let sym = match self {
3148
+ FeedType::Rss => "rss",
3149
+ FeedType::Atom => "atom",
3150
+ FeedType::JsonFeed => "json_feed",
3151
+ };
3152
+ handle.to_symbol(sym).into_value_with(handle)
3153
+ }
3154
+ }
3155
+
3156
+ impl magnus::TryConvert for FeedType {
3157
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3158
+ let s: String = magnus::TryConvert::try_convert(val)?;
3159
+ match s.as_str() {
3160
+ "rss" => Ok(FeedType::Rss),
3161
+ "atom" => Ok(FeedType::Atom),
3162
+ "json_feed" => Ok(FeedType::JsonFeed),
3163
+ other => Err(magnus::Error::new(
3164
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
3165
+ format!("invalid FeedType value: {other}"),
3166
+ )),
3167
+ }
3168
+ }
3169
+ }
3170
+
3171
+ unsafe impl IntoValueFromNative for FeedType {}
3172
+ unsafe impl TryConvertOwned for FeedType {}
3173
+
3174
+ #[derive(Clone, Copy, PartialEq, Eq, Debug, serde::Serialize, serde::Deserialize)]
3175
+ pub enum AssetCategory {
3176
+ Document,
3177
+ Image,
3178
+ Audio,
3179
+ Video,
3180
+ Font,
3181
+ Stylesheet,
3182
+ Script,
3183
+ Archive,
3184
+ Data,
3185
+ Other,
3186
+ }
3187
+
3188
+ impl Default for AssetCategory {
3189
+ fn default() -> Self {
3190
+ Self::Document
3191
+ }
3192
+ }
3193
+
3194
+ impl magnus::IntoValue for AssetCategory {
3195
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3196
+ let sym = match self {
3197
+ AssetCategory::Document => "document",
3198
+ AssetCategory::Image => "image",
3199
+ AssetCategory::Audio => "audio",
3200
+ AssetCategory::Video => "video",
3201
+ AssetCategory::Font => "font",
3202
+ AssetCategory::Stylesheet => "stylesheet",
3203
+ AssetCategory::Script => "script",
3204
+ AssetCategory::Archive => "archive",
3205
+ AssetCategory::Data => "data",
3206
+ AssetCategory::Other => "other",
3207
+ };
3208
+ handle.to_symbol(sym).into_value_with(handle)
3209
+ }
3210
+ }
3211
+
3212
+ impl magnus::TryConvert for AssetCategory {
3213
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3214
+ let s: String = magnus::TryConvert::try_convert(val)?;
3215
+ match s.as_str() {
3216
+ "document" => Ok(AssetCategory::Document),
3217
+ "image" => Ok(AssetCategory::Image),
3218
+ "audio" => Ok(AssetCategory::Audio),
3219
+ "video" => Ok(AssetCategory::Video),
3220
+ "font" => Ok(AssetCategory::Font),
3221
+ "stylesheet" => Ok(AssetCategory::Stylesheet),
3222
+ "script" => Ok(AssetCategory::Script),
3223
+ "archive" => Ok(AssetCategory::Archive),
3224
+ "data" => Ok(AssetCategory::Data),
3225
+ "other" => Ok(AssetCategory::Other),
3226
+ other => Err(magnus::Error::new(
3227
+ unsafe { Ruby::get_unchecked() }.exception_arg_error(),
3228
+ format!("invalid AssetCategory value: {other}"),
3229
+ )),
3230
+ }
3231
+ }
3232
+ }
3233
+
3234
+ unsafe impl IntoValueFromNative for AssetCategory {}
3235
+ unsafe impl TryConvertOwned for AssetCategory {}
3236
+
3237
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
3238
+ pub enum CrawlEvent {
3239
+ Page { _0: CrawlPageResult },
3240
+ Error { url: String, error: String },
3241
+ Complete { pages_crawled: usize },
3242
+ }
3243
+
3244
+ impl Default for CrawlEvent {
3245
+ fn default() -> Self {
3246
+ Self::Page { _0: Default::default() }
3247
+ }
3248
+ }
3249
+
3250
+ impl magnus::IntoValue for CrawlEvent {
3251
+ fn into_value_with(self, handle: &Ruby) -> magnus::Value {
3252
+ match serde_json::to_value(&self) {
3253
+ Ok(v) => json_to_ruby(handle, v),
3254
+ Err(_) => handle.qnil().into_value_with(handle),
3255
+ }
3256
+ }
3257
+ }
3258
+
3259
+ impl magnus::TryConvert for CrawlEvent {
3260
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
3261
+ let s: String = magnus::TryConvert::try_convert(val)?;
3262
+ serde_json::from_str(&s)
3263
+ .map_err(|e| magnus::Error::new(unsafe { Ruby::get_unchecked() }.exception_type_error(), e.to_string()))
3264
+ }
3265
+ }
3266
+
3267
+ unsafe impl IntoValueFromNative for CrawlEvent {}
3268
+ unsafe impl TryConvertOwned for CrawlEvent {}
3269
+
3270
+ fn create_engine(config: Option<String>) -> Result<CrawlEngineHandle, Error> {
3271
+ let config: Option<CrawlConfig> = config
3272
+ .as_deref()
3273
+ .filter(|s| *s != "nil")
3274
+ .map(|s| {
3275
+ let core: kreuzcrawl::CrawlConfig = serde_json::from_str(s).map_err(|e| {
3276
+ magnus::Error::new(unsafe { Ruby::get_unchecked() }.exception_type_error(), e.to_string())
3277
+ })?;
3278
+ Ok::<_, magnus::Error>(core.into())
3279
+ })
3280
+ .transpose()?;
3281
+ let result = kreuzcrawl::create_engine(config.map(Into::into)).map_err(|e| {
3282
+ magnus::Error::new(
3283
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3284
+ e.to_string(),
3285
+ )
3286
+ })?;
3287
+ Ok(CrawlEngineHandle {
3288
+ inner: Arc::new(result),
3289
+ })
3290
+ }
3291
+
3292
+ fn scrape(engine: CrawlEngineHandle, url: String) -> Result<ScrapeResult, Error> {
3293
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3294
+ magnus::Error::new(
3295
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3296
+ e.to_string(),
3297
+ )
3298
+ })?;
3299
+ let result = rt
3300
+ .block_on(async { kreuzcrawl::scrape(&engine.inner, &url).await })
3301
+ .map_err(|e| {
3302
+ magnus::Error::new(
3303
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3304
+ e.to_string(),
3305
+ )
3306
+ })?;
3307
+ Ok(result.into())
3308
+ }
3309
+
3310
+ fn scrape_async(engine: CrawlEngineHandle, url: String) -> Result<ScrapeResult, Error> {
3311
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3312
+ magnus::Error::new(
3313
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3314
+ e.to_string(),
3315
+ )
3316
+ })?;
3317
+ let result = rt
3318
+ .block_on(async { kreuzcrawl::scrape(&engine.inner, &url).await })
3319
+ .map_err(|e| {
3320
+ magnus::Error::new(
3321
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3322
+ e.to_string(),
3323
+ )
3324
+ })?;
3325
+ Ok(result.into())
3326
+ }
3327
+
3328
+ fn crawl(engine: CrawlEngineHandle, url: String) -> Result<CrawlResult, Error> {
3329
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3330
+ magnus::Error::new(
3331
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3332
+ e.to_string(),
3333
+ )
3334
+ })?;
3335
+ let result = rt
3336
+ .block_on(async { kreuzcrawl::crawl(&engine.inner, &url).await })
3337
+ .map_err(|e| {
3338
+ magnus::Error::new(
3339
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3340
+ e.to_string(),
3341
+ )
3342
+ })?;
3343
+ Ok(result.into())
3344
+ }
3345
+
3346
+ fn crawl_async(engine: CrawlEngineHandle, url: String) -> Result<CrawlResult, Error> {
3347
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3348
+ magnus::Error::new(
3349
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3350
+ e.to_string(),
3351
+ )
3352
+ })?;
3353
+ let result = rt
3354
+ .block_on(async { kreuzcrawl::crawl(&engine.inner, &url).await })
3355
+ .map_err(|e| {
3356
+ magnus::Error::new(
3357
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3358
+ e.to_string(),
3359
+ )
3360
+ })?;
3361
+ Ok(result.into())
3362
+ }
3363
+
3364
+ fn map_urls(engine: CrawlEngineHandle, url: String) -> Result<MapResult, Error> {
3365
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3366
+ magnus::Error::new(
3367
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3368
+ e.to_string(),
3369
+ )
3370
+ })?;
3371
+ let result = rt
3372
+ .block_on(async { kreuzcrawl::map_urls(&engine.inner, &url).await })
3373
+ .map_err(|e| {
3374
+ magnus::Error::new(
3375
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3376
+ e.to_string(),
3377
+ )
3378
+ })?;
3379
+ Ok(result.into())
3380
+ }
3381
+
3382
+ fn map_urls_async(engine: CrawlEngineHandle, url: String) -> Result<MapResult, Error> {
3383
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3384
+ magnus::Error::new(
3385
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3386
+ e.to_string(),
3387
+ )
3388
+ })?;
3389
+ let result = rt
3390
+ .block_on(async { kreuzcrawl::map_urls(&engine.inner, &url).await })
3391
+ .map_err(|e| {
3392
+ magnus::Error::new(
3393
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3394
+ e.to_string(),
3395
+ )
3396
+ })?;
3397
+ Ok(result.into())
3398
+ }
3399
+
3400
+ fn batch_scrape(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec<BatchScrapeResult>, Error> {
3401
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3402
+ magnus::Error::new(
3403
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3404
+ e.to_string(),
3405
+ )
3406
+ })?;
3407
+ let result = rt.block_on(async { kreuzcrawl::batch_scrape(&engine.inner, urls).await });
3408
+ Ok(result.into_iter().map(Into::into).collect())
3409
+ }
3410
+
3411
+ fn batch_scrape_async(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec<BatchScrapeResult>, Error> {
3412
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3413
+ magnus::Error::new(
3414
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3415
+ e.to_string(),
3416
+ )
3417
+ })?;
3418
+ let result = rt.block_on(async { kreuzcrawl::batch_scrape(&engine.inner, urls).await });
3419
+ Ok(result.into_iter().map(Into::into).collect())
3420
+ }
3421
+
3422
+ fn batch_crawl(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec<BatchCrawlResult>, Error> {
3423
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3424
+ magnus::Error::new(
3425
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3426
+ e.to_string(),
3427
+ )
3428
+ })?;
3429
+ let result = rt.block_on(async { kreuzcrawl::batch_crawl(&engine.inner, urls).await });
3430
+ Ok(result.into_iter().map(Into::into).collect())
3431
+ }
3432
+
3433
+ fn batch_crawl_async(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec<BatchCrawlResult>, Error> {
3434
+ let rt = tokio::runtime::Runtime::new().map_err(|e| {
3435
+ magnus::Error::new(
3436
+ unsafe { Ruby::get_unchecked() }.exception_runtime_error(),
3437
+ e.to_string(),
3438
+ )
3439
+ })?;
3440
+ let result = rt.block_on(async { kreuzcrawl::batch_crawl(&engine.inner, urls).await });
3441
+ Ok(result.into_iter().map(Into::into).collect())
3442
+ }
3443
+
3444
+ impl From<ExtractionMeta> for kreuzcrawl::ExtractionMeta {
3445
+ fn from(val: ExtractionMeta) -> Self {
3446
+ Self {
3447
+ cost: val.cost,
3448
+ prompt_tokens: val.prompt_tokens,
3449
+ completion_tokens: val.completion_tokens,
3450
+ model: val.model,
3451
+ chunks_processed: val.chunks_processed,
3452
+ }
3453
+ }
3454
+ }
3455
+
3456
+ impl From<kreuzcrawl::ExtractionMeta> for ExtractionMeta {
3457
+ fn from(val: kreuzcrawl::ExtractionMeta) -> Self {
3458
+ Self {
3459
+ cost: val.cost,
3460
+ prompt_tokens: val.prompt_tokens,
3461
+ completion_tokens: val.completion_tokens,
3462
+ model: val.model,
3463
+ chunks_processed: val.chunks_processed,
3464
+ }
3465
+ }
3466
+ }
3467
+
3468
+ impl From<ProxyConfig> for kreuzcrawl::ProxyConfig {
3469
+ fn from(val: ProxyConfig) -> Self {
3470
+ Self {
3471
+ url: val.url,
3472
+ username: val.username,
3473
+ password: val.password,
3474
+ }
3475
+ }
3476
+ }
3477
+
3478
+ impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
3479
+ fn from(val: kreuzcrawl::ProxyConfig) -> Self {
3480
+ Self {
3481
+ url: val.url,
3482
+ username: val.username,
3483
+ password: val.password,
3484
+ }
3485
+ }
3486
+ }
3487
+
3488
+ impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
3489
+ fn from(val: BrowserConfig) -> Self {
3490
+ Self {
3491
+ mode: val.mode.into(),
3492
+ endpoint: val.endpoint,
3493
+ timeout: std::time::Duration::from_millis(val.timeout),
3494
+ wait: val.wait.into(),
3495
+ wait_selector: val.wait_selector,
3496
+ extra_wait: val.extra_wait.map(std::time::Duration::from_millis),
3497
+ }
3498
+ }
3499
+ }
3500
+
3501
+ impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
3502
+ fn from(val: kreuzcrawl::BrowserConfig) -> Self {
3503
+ Self {
3504
+ mode: val.mode.into(),
3505
+ endpoint: val.endpoint,
3506
+ timeout: val.timeout.as_millis() as u64,
3507
+ wait: val.wait.into(),
3508
+ wait_selector: val.wait_selector,
3509
+ extra_wait: val.extra_wait.map(|d| d.as_millis() as u64),
3510
+ }
3511
+ }
3512
+ }
3513
+
3514
+ #[allow(clippy::needless_update)]
3515
+ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
3516
+ fn from(val: CrawlConfig) -> Self {
3517
+ Self {
3518
+ max_depth: val.max_depth,
3519
+ max_pages: val.max_pages,
3520
+ max_concurrent: val.max_concurrent,
3521
+ respect_robots_txt: val.respect_robots_txt,
3522
+ user_agent: val.user_agent,
3523
+ stay_on_domain: val.stay_on_domain,
3524
+ allow_subdomains: val.allow_subdomains,
3525
+ include_paths: val.include_paths,
3526
+ exclude_paths: val.exclude_paths,
3527
+ custom_headers: val.custom_headers.into_iter().collect(),
3528
+ request_timeout: std::time::Duration::from_millis(val.request_timeout),
3529
+ max_redirects: val.max_redirects,
3530
+ retry_count: val.retry_count,
3531
+ retry_codes: val.retry_codes,
3532
+ cookies_enabled: val.cookies_enabled,
3533
+ auth: val.auth.map(Into::into),
3534
+ max_body_size: val.max_body_size,
3535
+ main_content_only: val.main_content_only,
3536
+ remove_tags: val.remove_tags,
3537
+ map_limit: val.map_limit,
3538
+ map_search: val.map_search,
3539
+ download_assets: val.download_assets,
3540
+ asset_types: val.asset_types.into_iter().map(Into::into).collect(),
3541
+ max_asset_size: val.max_asset_size,
3542
+ browser: val.browser.into(),
3543
+ proxy: val.proxy.map(Into::into),
3544
+ user_agents: val.user_agents,
3545
+ capture_screenshot: val.capture_screenshot,
3546
+ download_documents: val.download_documents,
3547
+ document_max_size: val.document_max_size,
3548
+ document_mime_types: val.document_mime_types,
3549
+ warc_output: val.warc_output.map(Into::into),
3550
+ browser_profile: val.browser_profile,
3551
+ save_browser_profile: val.save_browser_profile,
3552
+ ..Default::default()
3553
+ }
3554
+ }
3555
+ }
3556
+
3557
+ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
3558
+ fn from(val: kreuzcrawl::CrawlConfig) -> Self {
3559
+ Self {
3560
+ max_depth: val.max_depth,
3561
+ max_pages: val.max_pages,
3562
+ max_concurrent: val.max_concurrent,
3563
+ respect_robots_txt: val.respect_robots_txt,
3564
+ user_agent: val.user_agent,
3565
+ stay_on_domain: val.stay_on_domain,
3566
+ allow_subdomains: val.allow_subdomains,
3567
+ include_paths: val.include_paths,
3568
+ exclude_paths: val.exclude_paths,
3569
+ custom_headers: val.custom_headers.into_iter().collect(),
3570
+ request_timeout: val.request_timeout.as_millis() as u64,
3571
+ max_redirects: val.max_redirects,
3572
+ retry_count: val.retry_count,
3573
+ retry_codes: val.retry_codes,
3574
+ cookies_enabled: val.cookies_enabled,
3575
+ auth: val.auth.map(Into::into),
3576
+ max_body_size: val.max_body_size,
3577
+ main_content_only: val.main_content_only,
3578
+ remove_tags: val.remove_tags,
3579
+ map_limit: val.map_limit,
3580
+ map_search: val.map_search,
3581
+ download_assets: val.download_assets,
3582
+ asset_types: val.asset_types.into_iter().map(Into::into).collect(),
3583
+ max_asset_size: val.max_asset_size,
3584
+ browser: val.browser.into(),
3585
+ proxy: val.proxy.map(Into::into),
3586
+ user_agents: val.user_agents,
3587
+ capture_screenshot: val.capture_screenshot,
3588
+ download_documents: val.download_documents,
3589
+ document_max_size: val.document_max_size,
3590
+ document_mime_types: val.document_mime_types,
3591
+ warc_output: val.warc_output.map(|p| p.to_string_lossy().to_string()),
3592
+ browser_profile: val.browser_profile,
3593
+ save_browser_profile: val.save_browser_profile,
3594
+ }
3595
+ }
3596
+ }
3597
+
3598
+ impl From<DownloadedDocument> for kreuzcrawl::DownloadedDocument {
3599
+ fn from(val: DownloadedDocument) -> Self {
3600
+ Self {
3601
+ url: val.url,
3602
+ mime_type: Default::default(),
3603
+ content: val.content,
3604
+ size: val.size,
3605
+ filename: Default::default(),
3606
+ content_hash: Default::default(),
3607
+ headers: Default::default(),
3608
+ }
3609
+ }
3610
+ }
3611
+
3612
+ impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
3613
+ fn from(val: kreuzcrawl::DownloadedDocument) -> Self {
3614
+ Self {
3615
+ url: val.url,
3616
+ mime_type: format!("{:?}", val.mime_type),
3617
+ content: val.content.to_vec(),
3618
+ size: val.size,
3619
+ filename: val.filename.as_ref().map(|v| format!("{:?}", v)),
3620
+ content_hash: format!("{:?}", val.content_hash),
3621
+ headers: val
3622
+ .headers
3623
+ .into_iter()
3624
+ .map(|(k, v)| (format!("{:?}", k), format!("{:?}", v)))
3625
+ .collect(),
3626
+ }
3627
+ }
3628
+ }
3629
+
3630
+ impl From<kreuzcrawl::InteractionResult> for InteractionResult {
3631
+ fn from(val: kreuzcrawl::InteractionResult) -> Self {
3632
+ Self {
3633
+ action_results: val.action_results.into_iter().map(Into::into).collect(),
3634
+ final_html: val.final_html,
3635
+ final_url: val.final_url,
3636
+ screenshot: val.screenshot.map(|v| v.to_vec()),
3637
+ }
3638
+ }
3639
+ }
3640
+
3641
+ impl From<kreuzcrawl::ActionResult> for ActionResult {
3642
+ fn from(val: kreuzcrawl::ActionResult) -> Self {
3643
+ Self {
3644
+ action_index: val.action_index,
3645
+ action_type: format!("{:?}", val.action_type),
3646
+ success: val.success,
3647
+ data: val.data.as_ref().map(ToString::to_string),
3648
+ error: val.error,
3649
+ }
3650
+ }
3651
+ }
3652
+
3653
+ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
3654
+ fn from(val: ScrapeResult) -> Self {
3655
+ Self {
3656
+ status_code: val.status_code,
3657
+ content_type: val.content_type,
3658
+ html: val.html,
3659
+ body_size: val.body_size,
3660
+ metadata: val.metadata.into(),
3661
+ links: val.links.into_iter().map(Into::into).collect(),
3662
+ images: val.images.into_iter().map(Into::into).collect(),
3663
+ feeds: val.feeds.into_iter().map(Into::into).collect(),
3664
+ json_ld: val.json_ld.into_iter().map(Into::into).collect(),
3665
+ is_allowed: val.is_allowed,
3666
+ crawl_delay: val.crawl_delay,
3667
+ noindex_detected: val.noindex_detected,
3668
+ nofollow_detected: val.nofollow_detected,
3669
+ x_robots_tag: val.x_robots_tag,
3670
+ is_pdf: val.is_pdf,
3671
+ was_skipped: val.was_skipped,
3672
+ detected_charset: val.detected_charset,
3673
+ main_content_only: val.main_content_only,
3674
+ auth_header_sent: val.auth_header_sent,
3675
+ response_meta: val.response_meta.map(Into::into),
3676
+ assets: val.assets.into_iter().map(Into::into).collect(),
3677
+ js_render_hint: val.js_render_hint,
3678
+ browser_used: val.browser_used,
3679
+ markdown: val.markdown.map(Into::into),
3680
+ extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
3681
+ extraction_meta: val.extraction_meta.map(Into::into),
3682
+ screenshot: val.screenshot,
3683
+ downloaded_document: val.downloaded_document.map(Into::into),
3684
+ }
3685
+ }
3686
+ }
3687
+
3688
+ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
3689
+ fn from(val: kreuzcrawl::ScrapeResult) -> Self {
3690
+ Self {
3691
+ status_code: val.status_code,
3692
+ content_type: val.content_type,
3693
+ html: val.html,
3694
+ body_size: val.body_size,
3695
+ metadata: val.metadata.into(),
3696
+ links: val.links.into_iter().map(Into::into).collect(),
3697
+ images: val.images.into_iter().map(Into::into).collect(),
3698
+ feeds: val.feeds.into_iter().map(Into::into).collect(),
3699
+ json_ld: val.json_ld.into_iter().map(Into::into).collect(),
3700
+ is_allowed: val.is_allowed,
3701
+ crawl_delay: val.crawl_delay,
3702
+ noindex_detected: val.noindex_detected,
3703
+ nofollow_detected: val.nofollow_detected,
3704
+ x_robots_tag: val.x_robots_tag,
3705
+ is_pdf: val.is_pdf,
3706
+ was_skipped: val.was_skipped,
3707
+ detected_charset: val.detected_charset,
3708
+ main_content_only: val.main_content_only,
3709
+ auth_header_sent: val.auth_header_sent,
3710
+ response_meta: val.response_meta.map(Into::into),
3711
+ assets: val.assets.into_iter().map(Into::into).collect(),
3712
+ js_render_hint: val.js_render_hint,
3713
+ browser_used: val.browser_used,
3714
+ markdown: val.markdown.map(Into::into),
3715
+ extracted_data: val.extracted_data.as_ref().map(ToString::to_string),
3716
+ extraction_meta: val.extraction_meta.map(Into::into),
3717
+ screenshot: val.screenshot.map(|v| v.to_vec()),
3718
+ downloaded_document: val.downloaded_document.map(Into::into),
3719
+ }
3720
+ }
3721
+ }
3722
+
3723
+ impl From<CrawlPageResult> for kreuzcrawl::CrawlPageResult {
3724
+ fn from(val: CrawlPageResult) -> Self {
3725
+ Self {
3726
+ url: val.url,
3727
+ normalized_url: val.normalized_url,
3728
+ status_code: val.status_code,
3729
+ content_type: val.content_type,
3730
+ html: val.html,
3731
+ body_size: val.body_size,
3732
+ metadata: val.metadata.into(),
3733
+ links: val.links.into_iter().map(Into::into).collect(),
3734
+ images: val.images.into_iter().map(Into::into).collect(),
3735
+ feeds: val.feeds.into_iter().map(Into::into).collect(),
3736
+ json_ld: val.json_ld.into_iter().map(Into::into).collect(),
3737
+ depth: val.depth,
3738
+ stayed_on_domain: val.stayed_on_domain,
3739
+ was_skipped: val.was_skipped,
3740
+ is_pdf: val.is_pdf,
3741
+ detected_charset: val.detected_charset,
3742
+ markdown: val.markdown.map(Into::into),
3743
+ extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
3744
+ extraction_meta: val.extraction_meta.map(Into::into),
3745
+ downloaded_document: val.downloaded_document.map(Into::into),
3746
+ }
3747
+ }
3748
+ }
3749
+
3750
+ impl From<kreuzcrawl::CrawlPageResult> for CrawlPageResult {
3751
+ fn from(val: kreuzcrawl::CrawlPageResult) -> Self {
3752
+ Self {
3753
+ url: val.url,
3754
+ normalized_url: val.normalized_url,
3755
+ status_code: val.status_code,
3756
+ content_type: val.content_type,
3757
+ html: val.html,
3758
+ body_size: val.body_size,
3759
+ metadata: val.metadata.into(),
3760
+ links: val.links.into_iter().map(Into::into).collect(),
3761
+ images: val.images.into_iter().map(Into::into).collect(),
3762
+ feeds: val.feeds.into_iter().map(Into::into).collect(),
3763
+ json_ld: val.json_ld.into_iter().map(Into::into).collect(),
3764
+ depth: val.depth,
3765
+ stayed_on_domain: val.stayed_on_domain,
3766
+ was_skipped: val.was_skipped,
3767
+ is_pdf: val.is_pdf,
3768
+ detected_charset: val.detected_charset,
3769
+ markdown: val.markdown.map(Into::into),
3770
+ extracted_data: val.extracted_data.as_ref().map(ToString::to_string),
3771
+ extraction_meta: val.extraction_meta.map(Into::into),
3772
+ downloaded_document: val.downloaded_document.map(Into::into),
3773
+ }
3774
+ }
3775
+ }
3776
+
3777
+ impl From<CrawlResult> for kreuzcrawl::CrawlResult {
3778
+ fn from(val: CrawlResult) -> Self {
3779
+ Self {
3780
+ pages: val.pages.into_iter().map(Into::into).collect(),
3781
+ final_url: val.final_url,
3782
+ redirect_count: val.redirect_count,
3783
+ was_skipped: val.was_skipped,
3784
+ error: val.error,
3785
+ cookies: val.cookies.into_iter().map(Into::into).collect(),
3786
+ normalized_urls: val.normalized_urls,
3787
+ }
3788
+ }
3789
+ }
3790
+
3791
+ impl From<kreuzcrawl::CrawlResult> for CrawlResult {
3792
+ fn from(val: kreuzcrawl::CrawlResult) -> Self {
3793
+ Self {
3794
+ pages: val.pages.into_iter().map(Into::into).collect(),
3795
+ final_url: val.final_url,
3796
+ redirect_count: val.redirect_count,
3797
+ was_skipped: val.was_skipped,
3798
+ error: val.error,
3799
+ cookies: val.cookies.into_iter().map(Into::into).collect(),
3800
+ normalized_urls: val.normalized_urls,
3801
+ }
3802
+ }
3803
+ }
3804
+
3805
+ impl From<SitemapUrl> for kreuzcrawl::SitemapUrl {
3806
+ fn from(val: SitemapUrl) -> Self {
3807
+ Self {
3808
+ url: val.url,
3809
+ lastmod: val.lastmod,
3810
+ changefreq: val.changefreq,
3811
+ priority: val.priority,
3812
+ }
3813
+ }
3814
+ }
3815
+
3816
+ impl From<kreuzcrawl::SitemapUrl> for SitemapUrl {
3817
+ fn from(val: kreuzcrawl::SitemapUrl) -> Self {
3818
+ Self {
3819
+ url: val.url,
3820
+ lastmod: val.lastmod,
3821
+ changefreq: val.changefreq,
3822
+ priority: val.priority,
3823
+ }
3824
+ }
3825
+ }
3826
+
3827
+ impl From<MapResult> for kreuzcrawl::MapResult {
3828
+ fn from(val: MapResult) -> Self {
3829
+ Self {
3830
+ urls: val.urls.into_iter().map(Into::into).collect(),
3831
+ }
3832
+ }
3833
+ }
3834
+
3835
+ impl From<kreuzcrawl::MapResult> for MapResult {
3836
+ fn from(val: kreuzcrawl::MapResult) -> Self {
3837
+ Self {
3838
+ urls: val.urls.into_iter().map(Into::into).collect(),
3839
+ }
3840
+ }
3841
+ }
3842
+
3843
+ impl From<MarkdownResult> for kreuzcrawl::MarkdownResult {
3844
+ fn from(val: MarkdownResult) -> Self {
3845
+ Self {
3846
+ content: val.content,
3847
+ document_structure: val
3848
+ .document_structure
3849
+ .as_ref()
3850
+ .and_then(|s| serde_json::from_str(s).ok()),
3851
+ tables: val
3852
+ .tables
3853
+ .into_iter()
3854
+ .filter_map(|s| serde_json::from_str(&s).ok())
3855
+ .collect(),
3856
+ warnings: val.warnings,
3857
+ citations: val.citations.map(Into::into),
3858
+ fit_content: val.fit_content,
3859
+ }
3860
+ }
3861
+ }
3862
+
3863
+ impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
3864
+ fn from(val: kreuzcrawl::MarkdownResult) -> Self {
3865
+ Self {
3866
+ content: val.content,
3867
+ document_structure: val.document_structure.as_ref().map(ToString::to_string),
3868
+ tables: val.tables.iter().map(ToString::to_string).collect(),
3869
+ warnings: val.warnings,
3870
+ citations: val.citations.map(Into::into),
3871
+ fit_content: val.fit_content,
3872
+ }
3873
+ }
3874
+ }
3875
+
3876
+ impl From<kreuzcrawl::CachedPage> for CachedPage {
3877
+ fn from(val: kreuzcrawl::CachedPage) -> Self {
3878
+ Self {
3879
+ url: val.url,
3880
+ status_code: val.status_code,
3881
+ content_type: val.content_type,
3882
+ body: val.body,
3883
+ etag: val.etag,
3884
+ last_modified: val.last_modified,
3885
+ cached_at: val.cached_at,
3886
+ }
3887
+ }
3888
+ }
3889
+
3890
+ impl From<LinkInfo> for kreuzcrawl::LinkInfo {
3891
+ fn from(val: LinkInfo) -> Self {
3892
+ Self {
3893
+ url: val.url,
3894
+ text: val.text,
3895
+ link_type: val.link_type.into(),
3896
+ rel: val.rel,
3897
+ nofollow: val.nofollow,
3898
+ }
3899
+ }
3900
+ }
3901
+
3902
+ impl From<kreuzcrawl::LinkInfo> for LinkInfo {
3903
+ fn from(val: kreuzcrawl::LinkInfo) -> Self {
3904
+ Self {
3905
+ url: val.url,
3906
+ text: val.text,
3907
+ link_type: val.link_type.into(),
3908
+ rel: val.rel,
3909
+ nofollow: val.nofollow,
3910
+ }
3911
+ }
3912
+ }
3913
+
3914
+ impl From<ImageInfo> for kreuzcrawl::ImageInfo {
3915
+ fn from(val: ImageInfo) -> Self {
3916
+ Self {
3917
+ url: val.url,
3918
+ alt: val.alt,
3919
+ width: val.width,
3920
+ height: val.height,
3921
+ source: val.source.into(),
3922
+ }
3923
+ }
3924
+ }
3925
+
3926
+ impl From<kreuzcrawl::ImageInfo> for ImageInfo {
3927
+ fn from(val: kreuzcrawl::ImageInfo) -> Self {
3928
+ Self {
3929
+ url: val.url,
3930
+ alt: val.alt,
3931
+ width: val.width,
3932
+ height: val.height,
3933
+ source: val.source.into(),
3934
+ }
3935
+ }
3936
+ }
3937
+
3938
+ impl From<FeedInfo> for kreuzcrawl::FeedInfo {
3939
+ fn from(val: FeedInfo) -> Self {
3940
+ Self {
3941
+ url: val.url,
3942
+ title: val.title,
3943
+ feed_type: val.feed_type.into(),
3944
+ }
3945
+ }
3946
+ }
3947
+
3948
+ impl From<kreuzcrawl::FeedInfo> for FeedInfo {
3949
+ fn from(val: kreuzcrawl::FeedInfo) -> Self {
3950
+ Self {
3951
+ url: val.url,
3952
+ title: val.title,
3953
+ feed_type: val.feed_type.into(),
3954
+ }
3955
+ }
3956
+ }
3957
+
3958
+ impl From<JsonLdEntry> for kreuzcrawl::JsonLdEntry {
3959
+ fn from(val: JsonLdEntry) -> Self {
3960
+ Self {
3961
+ schema_type: val.schema_type,
3962
+ name: val.name,
3963
+ raw: val.raw,
3964
+ }
3965
+ }
3966
+ }
3967
+
3968
+ impl From<kreuzcrawl::JsonLdEntry> for JsonLdEntry {
3969
+ fn from(val: kreuzcrawl::JsonLdEntry) -> Self {
3970
+ Self {
3971
+ schema_type: val.schema_type,
3972
+ name: val.name,
3973
+ raw: val.raw,
3974
+ }
3975
+ }
3976
+ }
3977
+
3978
+ impl From<CookieInfo> for kreuzcrawl::CookieInfo {
3979
+ fn from(val: CookieInfo) -> Self {
3980
+ Self {
3981
+ name: val.name,
3982
+ value: val.value,
3983
+ domain: val.domain,
3984
+ path: val.path,
3985
+ }
3986
+ }
3987
+ }
3988
+
3989
+ impl From<kreuzcrawl::CookieInfo> for CookieInfo {
3990
+ fn from(val: kreuzcrawl::CookieInfo) -> Self {
3991
+ Self {
3992
+ name: val.name,
3993
+ value: val.value,
3994
+ domain: val.domain,
3995
+ path: val.path,
3996
+ }
3997
+ }
3998
+ }
3999
+
4000
+ impl From<DownloadedAsset> for kreuzcrawl::DownloadedAsset {
4001
+ fn from(val: DownloadedAsset) -> Self {
4002
+ Self {
4003
+ url: val.url,
4004
+ content_hash: val.content_hash,
4005
+ mime_type: val.mime_type,
4006
+ size: val.size,
4007
+ asset_category: val.asset_category.into(),
4008
+ html_tag: val.html_tag,
4009
+ }
4010
+ }
4011
+ }
4012
+
4013
+ impl From<kreuzcrawl::DownloadedAsset> for DownloadedAsset {
4014
+ fn from(val: kreuzcrawl::DownloadedAsset) -> Self {
4015
+ Self {
4016
+ url: val.url,
4017
+ content_hash: val.content_hash,
4018
+ mime_type: val.mime_type,
4019
+ size: val.size,
4020
+ asset_category: val.asset_category.into(),
4021
+ html_tag: val.html_tag,
4022
+ }
4023
+ }
4024
+ }
4025
+
4026
+ impl From<ArticleMetadata> for kreuzcrawl::ArticleMetadata {
4027
+ fn from(val: ArticleMetadata) -> Self {
4028
+ Self {
4029
+ published_time: val.published_time,
4030
+ modified_time: val.modified_time,
4031
+ author: val.author,
4032
+ section: val.section,
4033
+ tags: val.tags,
4034
+ }
4035
+ }
4036
+ }
4037
+
4038
+ impl From<kreuzcrawl::ArticleMetadata> for ArticleMetadata {
4039
+ fn from(val: kreuzcrawl::ArticleMetadata) -> Self {
4040
+ Self {
4041
+ published_time: val.published_time,
4042
+ modified_time: val.modified_time,
4043
+ author: val.author,
4044
+ section: val.section,
4045
+ tags: val.tags,
4046
+ }
4047
+ }
4048
+ }
4049
+
4050
+ impl From<HreflangEntry> for kreuzcrawl::HreflangEntry {
4051
+ fn from(val: HreflangEntry) -> Self {
4052
+ Self {
4053
+ lang: val.lang,
4054
+ url: val.url,
4055
+ }
4056
+ }
4057
+ }
4058
+
4059
+ impl From<kreuzcrawl::HreflangEntry> for HreflangEntry {
4060
+ fn from(val: kreuzcrawl::HreflangEntry) -> Self {
4061
+ Self {
4062
+ lang: val.lang,
4063
+ url: val.url,
4064
+ }
4065
+ }
4066
+ }
4067
+
4068
+ impl From<FaviconInfo> for kreuzcrawl::FaviconInfo {
4069
+ fn from(val: FaviconInfo) -> Self {
4070
+ Self {
4071
+ url: val.url,
4072
+ rel: val.rel,
4073
+ sizes: val.sizes,
4074
+ mime_type: val.mime_type,
4075
+ }
4076
+ }
4077
+ }
4078
+
4079
+ impl From<kreuzcrawl::FaviconInfo> for FaviconInfo {
4080
+ fn from(val: kreuzcrawl::FaviconInfo) -> Self {
4081
+ Self {
4082
+ url: val.url,
4083
+ rel: val.rel,
4084
+ sizes: val.sizes,
4085
+ mime_type: val.mime_type,
4086
+ }
4087
+ }
4088
+ }
4089
+
4090
+ impl From<HeadingInfo> for kreuzcrawl::HeadingInfo {
4091
+ fn from(val: HeadingInfo) -> Self {
4092
+ Self {
4093
+ level: val.level,
4094
+ text: val.text,
4095
+ }
4096
+ }
4097
+ }
4098
+
4099
+ impl From<kreuzcrawl::HeadingInfo> for HeadingInfo {
4100
+ fn from(val: kreuzcrawl::HeadingInfo) -> Self {
4101
+ Self {
4102
+ level: val.level,
4103
+ text: val.text,
4104
+ }
4105
+ }
4106
+ }
4107
+
4108
+ impl From<ResponseMeta> for kreuzcrawl::ResponseMeta {
4109
+ fn from(val: ResponseMeta) -> Self {
4110
+ Self {
4111
+ etag: val.etag,
4112
+ last_modified: val.last_modified,
4113
+ cache_control: val.cache_control,
4114
+ server: val.server,
4115
+ x_powered_by: val.x_powered_by,
4116
+ content_language: val.content_language,
4117
+ content_encoding: val.content_encoding,
4118
+ }
4119
+ }
4120
+ }
4121
+
4122
+ impl From<kreuzcrawl::ResponseMeta> for ResponseMeta {
4123
+ fn from(val: kreuzcrawl::ResponseMeta) -> Self {
4124
+ Self {
4125
+ etag: val.etag,
4126
+ last_modified: val.last_modified,
4127
+ cache_control: val.cache_control,
4128
+ server: val.server,
4129
+ x_powered_by: val.x_powered_by,
4130
+ content_language: val.content_language,
4131
+ content_encoding: val.content_encoding,
4132
+ }
4133
+ }
4134
+ }
4135
+
4136
+ impl From<PageMetadata> for kreuzcrawl::PageMetadata {
4137
+ fn from(val: PageMetadata) -> Self {
4138
+ Self {
4139
+ title: val.title,
4140
+ description: val.description,
4141
+ canonical_url: val.canonical_url,
4142
+ keywords: val.keywords,
4143
+ author: val.author,
4144
+ viewport: val.viewport,
4145
+ theme_color: val.theme_color,
4146
+ generator: val.generator,
4147
+ robots: val.robots,
4148
+ html_lang: val.html_lang,
4149
+ html_dir: val.html_dir,
4150
+ og_title: val.og_title,
4151
+ og_type: val.og_type,
4152
+ og_image: val.og_image,
4153
+ og_description: val.og_description,
4154
+ og_url: val.og_url,
4155
+ og_site_name: val.og_site_name,
4156
+ og_locale: val.og_locale,
4157
+ og_video: val.og_video,
4158
+ og_audio: val.og_audio,
4159
+ og_locale_alternates: val.og_locale_alternates,
4160
+ twitter_card: val.twitter_card,
4161
+ twitter_title: val.twitter_title,
4162
+ twitter_description: val.twitter_description,
4163
+ twitter_image: val.twitter_image,
4164
+ twitter_site: val.twitter_site,
4165
+ twitter_creator: val.twitter_creator,
4166
+ dc_title: val.dc_title,
4167
+ dc_creator: val.dc_creator,
4168
+ dc_subject: val.dc_subject,
4169
+ dc_description: val.dc_description,
4170
+ dc_publisher: val.dc_publisher,
4171
+ dc_date: val.dc_date,
4172
+ dc_type: val.dc_type,
4173
+ dc_format: val.dc_format,
4174
+ dc_identifier: val.dc_identifier,
4175
+ dc_language: val.dc_language,
4176
+ dc_rights: val.dc_rights,
4177
+ article: val.article.map(Into::into),
4178
+ hreflangs: val.hreflangs.map(|v| v.into_iter().map(Into::into).collect()),
4179
+ favicons: val.favicons.map(|v| v.into_iter().map(Into::into).collect()),
4180
+ headings: val.headings.map(|v| v.into_iter().map(Into::into).collect()),
4181
+ word_count: val.word_count,
4182
+ }
4183
+ }
4184
+ }
4185
+
4186
+ impl From<kreuzcrawl::PageMetadata> for PageMetadata {
4187
+ fn from(val: kreuzcrawl::PageMetadata) -> Self {
4188
+ Self {
4189
+ title: val.title,
4190
+ description: val.description,
4191
+ canonical_url: val.canonical_url,
4192
+ keywords: val.keywords,
4193
+ author: val.author,
4194
+ viewport: val.viewport,
4195
+ theme_color: val.theme_color,
4196
+ generator: val.generator,
4197
+ robots: val.robots,
4198
+ html_lang: val.html_lang,
4199
+ html_dir: val.html_dir,
4200
+ og_title: val.og_title,
4201
+ og_type: val.og_type,
4202
+ og_image: val.og_image,
4203
+ og_description: val.og_description,
4204
+ og_url: val.og_url,
4205
+ og_site_name: val.og_site_name,
4206
+ og_locale: val.og_locale,
4207
+ og_video: val.og_video,
4208
+ og_audio: val.og_audio,
4209
+ og_locale_alternates: val.og_locale_alternates,
4210
+ twitter_card: val.twitter_card,
4211
+ twitter_title: val.twitter_title,
4212
+ twitter_description: val.twitter_description,
4213
+ twitter_image: val.twitter_image,
4214
+ twitter_site: val.twitter_site,
4215
+ twitter_creator: val.twitter_creator,
4216
+ dc_title: val.dc_title,
4217
+ dc_creator: val.dc_creator,
4218
+ dc_subject: val.dc_subject,
4219
+ dc_description: val.dc_description,
4220
+ dc_publisher: val.dc_publisher,
4221
+ dc_date: val.dc_date,
4222
+ dc_type: val.dc_type,
4223
+ dc_format: val.dc_format,
4224
+ dc_identifier: val.dc_identifier,
4225
+ dc_language: val.dc_language,
4226
+ dc_rights: val.dc_rights,
4227
+ article: val.article.map(Into::into),
4228
+ hreflangs: val.hreflangs.map(|v| v.into_iter().map(Into::into).collect()),
4229
+ favicons: val.favicons.map(|v| v.into_iter().map(Into::into).collect()),
4230
+ headings: val.headings.map(|v| v.into_iter().map(Into::into).collect()),
4231
+ word_count: val.word_count,
4232
+ }
4233
+ }
4234
+ }
4235
+
4236
+ impl From<CitationResult> for kreuzcrawl::CitationResult {
4237
+ fn from(val: CitationResult) -> Self {
4238
+ Self {
4239
+ content: val.content,
4240
+ references: val.references.into_iter().map(Into::into).collect(),
4241
+ }
4242
+ }
4243
+ }
4244
+
4245
+ impl From<kreuzcrawl::CitationResult> for CitationResult {
4246
+ fn from(val: kreuzcrawl::CitationResult) -> Self {
4247
+ Self {
4248
+ content: val.content,
4249
+ references: val.references.into_iter().map(Into::into).collect(),
4250
+ }
4251
+ }
4252
+ }
4253
+
4254
+ impl From<CitationReference> for kreuzcrawl::CitationReference {
4255
+ fn from(val: CitationReference) -> Self {
4256
+ Self {
4257
+ index: val.index,
4258
+ url: val.url,
4259
+ text: val.text,
4260
+ }
4261
+ }
4262
+ }
4263
+
4264
+ impl From<kreuzcrawl::CitationReference> for CitationReference {
4265
+ fn from(val: kreuzcrawl::CitationReference) -> Self {
4266
+ Self {
4267
+ index: val.index,
4268
+ url: val.url,
4269
+ text: val.text,
4270
+ }
4271
+ }
4272
+ }
4273
+
4274
+ impl From<BatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
4275
+ fn from(val: BatchScrapeResult) -> Self {
4276
+ Self {
4277
+ url: val.url,
4278
+ result: val.result.map(Into::into),
4279
+ error: val.error,
4280
+ }
4281
+ }
4282
+ }
4283
+
4284
+ impl From<kreuzcrawl::BatchScrapeResult> for BatchScrapeResult {
4285
+ fn from(val: kreuzcrawl::BatchScrapeResult) -> Self {
4286
+ Self {
4287
+ url: val.url,
4288
+ result: val.result.map(Into::into),
4289
+ error: val.error,
4290
+ }
4291
+ }
4292
+ }
4293
+
4294
+ impl From<BatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
4295
+ fn from(val: BatchCrawlResult) -> Self {
4296
+ Self {
4297
+ url: val.url,
4298
+ result: val.result.map(Into::into),
4299
+ error: val.error,
4300
+ }
4301
+ }
4302
+ }
4303
+
4304
+ impl From<kreuzcrawl::BatchCrawlResult> for BatchCrawlResult {
4305
+ fn from(val: kreuzcrawl::BatchCrawlResult) -> Self {
4306
+ Self {
4307
+ url: val.url,
4308
+ result: val.result.map(Into::into),
4309
+ error: val.error,
4310
+ }
4311
+ }
4312
+ }
4313
+
4314
+ impl From<BrowserMode> for kreuzcrawl::BrowserMode {
4315
+ fn from(val: BrowserMode) -> Self {
4316
+ match val {
4317
+ BrowserMode::Auto => Self::Auto,
4318
+ BrowserMode::Always => Self::Always,
4319
+ BrowserMode::Never => Self::Never,
4320
+ }
4321
+ }
4322
+ }
4323
+
4324
+ impl From<kreuzcrawl::BrowserMode> for BrowserMode {
4325
+ fn from(val: kreuzcrawl::BrowserMode) -> Self {
4326
+ match val {
4327
+ kreuzcrawl::BrowserMode::Auto => Self::Auto,
4328
+ kreuzcrawl::BrowserMode::Always => Self::Always,
4329
+ kreuzcrawl::BrowserMode::Never => Self::Never,
4330
+ }
4331
+ }
4332
+ }
4333
+
4334
+ impl From<BrowserWait> for kreuzcrawl::BrowserWait {
4335
+ fn from(val: BrowserWait) -> Self {
4336
+ match val {
4337
+ BrowserWait::NetworkIdle => Self::NetworkIdle,
4338
+ BrowserWait::Selector => Self::Selector,
4339
+ BrowserWait::Fixed => Self::Fixed,
4340
+ }
4341
+ }
4342
+ }
4343
+
4344
+ impl From<kreuzcrawl::BrowserWait> for BrowserWait {
4345
+ fn from(val: kreuzcrawl::BrowserWait) -> Self {
4346
+ match val {
4347
+ kreuzcrawl::BrowserWait::NetworkIdle => Self::NetworkIdle,
4348
+ kreuzcrawl::BrowserWait::Selector => Self::Selector,
4349
+ kreuzcrawl::BrowserWait::Fixed => Self::Fixed,
4350
+ }
4351
+ }
4352
+ }
4353
+
4354
+ impl From<AuthConfig> for kreuzcrawl::AuthConfig {
4355
+ fn from(val: AuthConfig) -> Self {
4356
+ match val {
4357
+ AuthConfig::Basic { username, password } => Self::Basic { username, password },
4358
+ AuthConfig::Bearer { token } => Self::Bearer { token },
4359
+ AuthConfig::Header { name, value } => Self::Header { name, value },
4360
+ }
4361
+ }
4362
+ }
4363
+
4364
+ impl From<kreuzcrawl::AuthConfig> for AuthConfig {
4365
+ fn from(val: kreuzcrawl::AuthConfig) -> Self {
4366
+ match val {
4367
+ kreuzcrawl::AuthConfig::Basic { username, password } => Self::Basic { username, password },
4368
+ kreuzcrawl::AuthConfig::Bearer { token } => Self::Bearer { token },
4369
+ kreuzcrawl::AuthConfig::Header { name, value } => Self::Header { name, value },
4370
+ }
4371
+ }
4372
+ }
4373
+
4374
+ impl From<LinkType> for kreuzcrawl::LinkType {
4375
+ fn from(val: LinkType) -> Self {
4376
+ match val {
4377
+ LinkType::Internal => Self::Internal,
4378
+ LinkType::External => Self::External,
4379
+ LinkType::Anchor => Self::Anchor,
4380
+ LinkType::Document => Self::Document,
4381
+ }
4382
+ }
4383
+ }
4384
+
4385
+ impl From<kreuzcrawl::LinkType> for LinkType {
4386
+ fn from(val: kreuzcrawl::LinkType) -> Self {
4387
+ match val {
4388
+ kreuzcrawl::LinkType::Internal => Self::Internal,
4389
+ kreuzcrawl::LinkType::External => Self::External,
4390
+ kreuzcrawl::LinkType::Anchor => Self::Anchor,
4391
+ kreuzcrawl::LinkType::Document => Self::Document,
4392
+ }
4393
+ }
4394
+ }
4395
+
4396
+ impl From<ImageSource> for kreuzcrawl::ImageSource {
4397
+ fn from(val: ImageSource) -> Self {
4398
+ match val {
4399
+ ImageSource::Img => Self::Img,
4400
+ ImageSource::PictureSource => Self::PictureSource,
4401
+ ImageSource::OgImage => Self::OgImage,
4402
+ ImageSource::TwitterImage => Self::TwitterImage,
4403
+ }
4404
+ }
4405
+ }
4406
+
4407
+ impl From<kreuzcrawl::ImageSource> for ImageSource {
4408
+ fn from(val: kreuzcrawl::ImageSource) -> Self {
4409
+ match val {
4410
+ kreuzcrawl::ImageSource::Img => Self::Img,
4411
+ kreuzcrawl::ImageSource::PictureSource => Self::PictureSource,
4412
+ kreuzcrawl::ImageSource::OgImage => Self::OgImage,
4413
+ kreuzcrawl::ImageSource::TwitterImage => Self::TwitterImage,
4414
+ }
4415
+ }
4416
+ }
4417
+
4418
+ impl From<FeedType> for kreuzcrawl::FeedType {
4419
+ fn from(val: FeedType) -> Self {
4420
+ match val {
4421
+ FeedType::Rss => Self::Rss,
4422
+ FeedType::Atom => Self::Atom,
4423
+ FeedType::JsonFeed => Self::JsonFeed,
4424
+ }
4425
+ }
4426
+ }
4427
+
4428
+ impl From<kreuzcrawl::FeedType> for FeedType {
4429
+ fn from(val: kreuzcrawl::FeedType) -> Self {
4430
+ match val {
4431
+ kreuzcrawl::FeedType::Rss => Self::Rss,
4432
+ kreuzcrawl::FeedType::Atom => Self::Atom,
4433
+ kreuzcrawl::FeedType::JsonFeed => Self::JsonFeed,
4434
+ }
4435
+ }
4436
+ }
4437
+
4438
+ impl From<AssetCategory> for kreuzcrawl::AssetCategory {
4439
+ fn from(val: AssetCategory) -> Self {
4440
+ match val {
4441
+ AssetCategory::Document => Self::Document,
4442
+ AssetCategory::Image => Self::Image,
4443
+ AssetCategory::Audio => Self::Audio,
4444
+ AssetCategory::Video => Self::Video,
4445
+ AssetCategory::Font => Self::Font,
4446
+ AssetCategory::Stylesheet => Self::Stylesheet,
4447
+ AssetCategory::Script => Self::Script,
4448
+ AssetCategory::Archive => Self::Archive,
4449
+ AssetCategory::Data => Self::Data,
4450
+ AssetCategory::Other => Self::Other,
4451
+ }
4452
+ }
4453
+ }
4454
+
4455
+ impl From<kreuzcrawl::AssetCategory> for AssetCategory {
4456
+ fn from(val: kreuzcrawl::AssetCategory) -> Self {
4457
+ match val {
4458
+ kreuzcrawl::AssetCategory::Document => Self::Document,
4459
+ kreuzcrawl::AssetCategory::Image => Self::Image,
4460
+ kreuzcrawl::AssetCategory::Audio => Self::Audio,
4461
+ kreuzcrawl::AssetCategory::Video => Self::Video,
4462
+ kreuzcrawl::AssetCategory::Font => Self::Font,
4463
+ kreuzcrawl::AssetCategory::Stylesheet => Self::Stylesheet,
4464
+ kreuzcrawl::AssetCategory::Script => Self::Script,
4465
+ kreuzcrawl::AssetCategory::Archive => Self::Archive,
4466
+ kreuzcrawl::AssetCategory::Data => Self::Data,
4467
+ kreuzcrawl::AssetCategory::Other => Self::Other,
4468
+ }
4469
+ }
4470
+ }
4471
+
4472
+ impl From<kreuzcrawl::CrawlEvent> for CrawlEvent {
4473
+ fn from(val: kreuzcrawl::CrawlEvent) -> Self {
4474
+ match val {
4475
+ kreuzcrawl::CrawlEvent::Page(_0) => Self::Page { _0: (*_0).into() },
4476
+ kreuzcrawl::CrawlEvent::Error { url, error } => Self::Error { url, error },
4477
+ kreuzcrawl::CrawlEvent::Complete { pages_crawled } => Self::Complete { pages_crawled },
4478
+ }
4479
+ }
4480
+ }
4481
+
4482
+ /// Convert a `kreuzcrawl::CrawlError` error to a Magnus runtime error.
4483
+ #[allow(dead_code)]
4484
+ fn crawl_error_to_magnus_err(e: kreuzcrawl::CrawlError) -> magnus::Error {
4485
+ let msg = e.to_string();
4486
+ magnus::Error::new(unsafe { magnus::Ruby::get_unchecked() }.exception_runtime_error(), msg)
4487
+ }
4488
+
4489
+ #[magnus::init]
4490
+ fn init(ruby: &Ruby) -> Result<(), Error> {
4491
+ let module = ruby.define_module("Kreuzcrawl")?;
4492
+
4493
+ let class = module.define_class("ExtractionMeta", ruby.class_object())?;
4494
+ class.define_singleton_method("new", function!(ExtractionMeta::new, 5))?;
4495
+ class.define_method("cost", method!(ExtractionMeta::cost, 0))?;
4496
+ class.define_method("prompt_tokens", method!(ExtractionMeta::prompt_tokens, 0))?;
4497
+ class.define_method("completion_tokens", method!(ExtractionMeta::completion_tokens, 0))?;
4498
+ class.define_method("model", method!(ExtractionMeta::model, 0))?;
4499
+ class.define_method("chunks_processed", method!(ExtractionMeta::chunks_processed, 0))?;
4500
+
4501
+ let class = module.define_class("ProxyConfig", ruby.class_object())?;
4502
+ class.define_singleton_method("new", function!(ProxyConfig::new, 3))?;
4503
+ class.define_method("url", method!(ProxyConfig::url, 0))?;
4504
+ class.define_method("username", method!(ProxyConfig::username, 0))?;
4505
+ class.define_method("password", method!(ProxyConfig::password, 0))?;
4506
+
4507
+ let class = module.define_class("BrowserConfig", ruby.class_object())?;
4508
+ class.define_singleton_method("new", function!(BrowserConfig::new, 6))?;
4509
+ class.define_method("mode", method!(BrowserConfig::mode, 0))?;
4510
+ class.define_method("endpoint", method!(BrowserConfig::endpoint, 0))?;
4511
+ class.define_method("timeout", method!(BrowserConfig::timeout, 0))?;
4512
+ class.define_method("wait", method!(BrowserConfig::wait, 0))?;
4513
+ class.define_method("wait_selector", method!(BrowserConfig::wait_selector, 0))?;
4514
+ class.define_method("extra_wait", method!(BrowserConfig::extra_wait, 0))?;
4515
+
4516
+ let class = module.define_class("CrawlConfig", ruby.class_object())?;
4517
+ class.define_singleton_method("new", function!(CrawlConfig::new, 1))?;
4518
+ class.define_method("max_depth", method!(CrawlConfig::max_depth, 0))?;
4519
+ class.define_method("max_pages", method!(CrawlConfig::max_pages, 0))?;
4520
+ class.define_method("max_concurrent", method!(CrawlConfig::max_concurrent, 0))?;
4521
+ class.define_method("respect_robots_txt", method!(CrawlConfig::respect_robots_txt, 0))?;
4522
+ class.define_method("user_agent", method!(CrawlConfig::user_agent, 0))?;
4523
+ class.define_method("stay_on_domain", method!(CrawlConfig::stay_on_domain, 0))?;
4524
+ class.define_method("allow_subdomains", method!(CrawlConfig::allow_subdomains, 0))?;
4525
+ class.define_method("include_paths", method!(CrawlConfig::include_paths, 0))?;
4526
+ class.define_method("exclude_paths", method!(CrawlConfig::exclude_paths, 0))?;
4527
+ class.define_method("custom_headers", method!(CrawlConfig::custom_headers, 0))?;
4528
+ class.define_method("request_timeout", method!(CrawlConfig::request_timeout, 0))?;
4529
+ class.define_method("max_redirects", method!(CrawlConfig::max_redirects, 0))?;
4530
+ class.define_method("retry_count", method!(CrawlConfig::retry_count, 0))?;
4531
+ class.define_method("retry_codes", method!(CrawlConfig::retry_codes, 0))?;
4532
+ class.define_method("cookies_enabled", method!(CrawlConfig::cookies_enabled, 0))?;
4533
+ class.define_method("auth", method!(CrawlConfig::auth, 0))?;
4534
+ class.define_method("max_body_size", method!(CrawlConfig::max_body_size, 0))?;
4535
+ class.define_method("main_content_only", method!(CrawlConfig::main_content_only, 0))?;
4536
+ class.define_method("remove_tags", method!(CrawlConfig::remove_tags, 0))?;
4537
+ class.define_method("map_limit", method!(CrawlConfig::map_limit, 0))?;
4538
+ class.define_method("map_search", method!(CrawlConfig::map_search, 0))?;
4539
+ class.define_method("download_assets", method!(CrawlConfig::download_assets, 0))?;
4540
+ class.define_method("asset_types", method!(CrawlConfig::asset_types, 0))?;
4541
+ class.define_method("max_asset_size", method!(CrawlConfig::max_asset_size, 0))?;
4542
+ class.define_method("browser", method!(CrawlConfig::browser, 0))?;
4543
+ class.define_method("proxy", method!(CrawlConfig::proxy, 0))?;
4544
+ class.define_method("user_agents", method!(CrawlConfig::user_agents, 0))?;
4545
+ class.define_method("capture_screenshot", method!(CrawlConfig::capture_screenshot, 0))?;
4546
+ class.define_method("download_documents", method!(CrawlConfig::download_documents, 0))?;
4547
+ class.define_method("document_max_size", method!(CrawlConfig::document_max_size, 0))?;
4548
+ class.define_method("document_mime_types", method!(CrawlConfig::document_mime_types, 0))?;
4549
+ class.define_method("warc_output", method!(CrawlConfig::warc_output, 0))?;
4550
+ class.define_method("browser_profile", method!(CrawlConfig::browser_profile, 0))?;
4551
+ class.define_method("save_browser_profile", method!(CrawlConfig::save_browser_profile, 0))?;
4552
+
4553
+ let class = module.define_class("DownloadedDocument", ruby.class_object())?;
4554
+ class.define_singleton_method("new", function!(DownloadedDocument::new, 7))?;
4555
+ class.define_method("url", method!(DownloadedDocument::url, 0))?;
4556
+ class.define_method("mime_type", method!(DownloadedDocument::mime_type, 0))?;
4557
+ class.define_method("content", method!(DownloadedDocument::content, 0))?;
4558
+ class.define_method("size", method!(DownloadedDocument::size, 0))?;
4559
+ class.define_method("filename", method!(DownloadedDocument::filename, 0))?;
4560
+ class.define_method("content_hash", method!(DownloadedDocument::content_hash, 0))?;
4561
+ class.define_method("headers", method!(DownloadedDocument::headers, 0))?;
4562
+
4563
+ let class = module.define_class("InteractionResult", ruby.class_object())?;
4564
+ class.define_singleton_method("new", function!(InteractionResult::new, 4))?;
4565
+ class.define_method("action_results", method!(InteractionResult::action_results, 0))?;
4566
+ class.define_method("final_html", method!(InteractionResult::final_html, 0))?;
4567
+ class.define_method("final_url", method!(InteractionResult::final_url, 0))?;
4568
+ class.define_method("screenshot", method!(InteractionResult::screenshot, 0))?;
4569
+
4570
+ let class = module.define_class("ActionResult", ruby.class_object())?;
4571
+ class.define_singleton_method("new", function!(ActionResult::new, 5))?;
4572
+ class.define_method("action_index", method!(ActionResult::action_index, 0))?;
4573
+ class.define_method("action_type", method!(ActionResult::action_type, 0))?;
4574
+ class.define_method("success", method!(ActionResult::success, 0))?;
4575
+ class.define_method("data", method!(ActionResult::data, 0))?;
4576
+ class.define_method("error", method!(ActionResult::error, 0))?;
4577
+
4578
+ let class = module.define_class("ScrapeResult", ruby.class_object())?;
4579
+ class.define_singleton_method("new", function!(ScrapeResult::new, 1))?;
4580
+ class.define_method("status_code", method!(ScrapeResult::status_code, 0))?;
4581
+ class.define_method("content_type", method!(ScrapeResult::content_type, 0))?;
4582
+ class.define_method("html", method!(ScrapeResult::html, 0))?;
4583
+ class.define_method("body_size", method!(ScrapeResult::body_size, 0))?;
4584
+ class.define_method("metadata", method!(ScrapeResult::metadata, 0))?;
4585
+ class.define_method("links", method!(ScrapeResult::links, 0))?;
4586
+ class.define_method("images", method!(ScrapeResult::images, 0))?;
4587
+ class.define_method("feeds", method!(ScrapeResult::feeds, 0))?;
4588
+ class.define_method("json_ld", method!(ScrapeResult::json_ld, 0))?;
4589
+ class.define_method("is_allowed", method!(ScrapeResult::is_allowed, 0))?;
4590
+ class.define_method("crawl_delay", method!(ScrapeResult::crawl_delay, 0))?;
4591
+ class.define_method("noindex_detected", method!(ScrapeResult::noindex_detected, 0))?;
4592
+ class.define_method("nofollow_detected", method!(ScrapeResult::nofollow_detected, 0))?;
4593
+ class.define_method("x_robots_tag", method!(ScrapeResult::x_robots_tag, 0))?;
4594
+ class.define_method("is_pdf", method!(ScrapeResult::is_pdf, 0))?;
4595
+ class.define_method("was_skipped", method!(ScrapeResult::was_skipped, 0))?;
4596
+ class.define_method("detected_charset", method!(ScrapeResult::detected_charset, 0))?;
4597
+ class.define_method("main_content_only", method!(ScrapeResult::main_content_only, 0))?;
4598
+ class.define_method("auth_header_sent", method!(ScrapeResult::auth_header_sent, 0))?;
4599
+ class.define_method("response_meta", method!(ScrapeResult::response_meta, 0))?;
4600
+ class.define_method("assets", method!(ScrapeResult::assets, 0))?;
4601
+ class.define_method("js_render_hint", method!(ScrapeResult::js_render_hint, 0))?;
4602
+ class.define_method("browser_used", method!(ScrapeResult::browser_used, 0))?;
4603
+ class.define_method("markdown", method!(ScrapeResult::markdown, 0))?;
4604
+ class.define_method("extracted_data", method!(ScrapeResult::extracted_data, 0))?;
4605
+ class.define_method("extraction_meta", method!(ScrapeResult::extraction_meta, 0))?;
4606
+ class.define_method("screenshot", method!(ScrapeResult::screenshot, 0))?;
4607
+ class.define_method("downloaded_document", method!(ScrapeResult::downloaded_document, 0))?;
4608
+
4609
+ let class = module.define_class("CrawlPageResult", ruby.class_object())?;
4610
+ class.define_singleton_method("new", function!(CrawlPageResult::new, 1))?;
4611
+ class.define_method("url", method!(CrawlPageResult::url, 0))?;
4612
+ class.define_method("normalized_url", method!(CrawlPageResult::normalized_url, 0))?;
4613
+ class.define_method("status_code", method!(CrawlPageResult::status_code, 0))?;
4614
+ class.define_method("content_type", method!(CrawlPageResult::content_type, 0))?;
4615
+ class.define_method("html", method!(CrawlPageResult::html, 0))?;
4616
+ class.define_method("body_size", method!(CrawlPageResult::body_size, 0))?;
4617
+ class.define_method("metadata", method!(CrawlPageResult::metadata, 0))?;
4618
+ class.define_method("links", method!(CrawlPageResult::links, 0))?;
4619
+ class.define_method("images", method!(CrawlPageResult::images, 0))?;
4620
+ class.define_method("feeds", method!(CrawlPageResult::feeds, 0))?;
4621
+ class.define_method("json_ld", method!(CrawlPageResult::json_ld, 0))?;
4622
+ class.define_method("depth", method!(CrawlPageResult::depth, 0))?;
4623
+ class.define_method("stayed_on_domain", method!(CrawlPageResult::stayed_on_domain, 0))?;
4624
+ class.define_method("was_skipped", method!(CrawlPageResult::was_skipped, 0))?;
4625
+ class.define_method("is_pdf", method!(CrawlPageResult::is_pdf, 0))?;
4626
+ class.define_method("detected_charset", method!(CrawlPageResult::detected_charset, 0))?;
4627
+ class.define_method("markdown", method!(CrawlPageResult::markdown, 0))?;
4628
+ class.define_method("extracted_data", method!(CrawlPageResult::extracted_data, 0))?;
4629
+ class.define_method("extraction_meta", method!(CrawlPageResult::extraction_meta, 0))?;
4630
+ class.define_method("downloaded_document", method!(CrawlPageResult::downloaded_document, 0))?;
4631
+
4632
+ let class = module.define_class("CrawlResult", ruby.class_object())?;
4633
+ class.define_singleton_method("new", function!(CrawlResult::new, 7))?;
4634
+ class.define_method("pages", method!(CrawlResult::pages, 0))?;
4635
+ class.define_method("final_url", method!(CrawlResult::final_url, 0))?;
4636
+ class.define_method("redirect_count", method!(CrawlResult::redirect_count, 0))?;
4637
+ class.define_method("was_skipped", method!(CrawlResult::was_skipped, 0))?;
4638
+ class.define_method("error", method!(CrawlResult::error, 0))?;
4639
+ class.define_method("cookies", method!(CrawlResult::cookies, 0))?;
4640
+ class.define_method("normalized_urls", method!(CrawlResult::normalized_urls, 0))?;
4641
+
4642
+ let class = module.define_class("SitemapUrl", ruby.class_object())?;
4643
+ class.define_singleton_method("new", function!(SitemapUrl::new, 4))?;
4644
+ class.define_method("url", method!(SitemapUrl::url, 0))?;
4645
+ class.define_method("lastmod", method!(SitemapUrl::lastmod, 0))?;
4646
+ class.define_method("changefreq", method!(SitemapUrl::changefreq, 0))?;
4647
+ class.define_method("priority", method!(SitemapUrl::priority, 0))?;
4648
+
4649
+ let class = module.define_class("MapResult", ruby.class_object())?;
4650
+ class.define_singleton_method("new", function!(MapResult::new, 1))?;
4651
+ class.define_method("urls", method!(MapResult::urls, 0))?;
4652
+
4653
+ let class = module.define_class("MarkdownResult", ruby.class_object())?;
4654
+ class.define_singleton_method("new", function!(MarkdownResult::new, 6))?;
4655
+ class.define_method("content", method!(MarkdownResult::content, 0))?;
4656
+ class.define_method("document_structure", method!(MarkdownResult::document_structure, 0))?;
4657
+ class.define_method("tables", method!(MarkdownResult::tables, 0))?;
4658
+ class.define_method("warnings", method!(MarkdownResult::warnings, 0))?;
4659
+ class.define_method("citations", method!(MarkdownResult::citations, 0))?;
4660
+ class.define_method("fit_content", method!(MarkdownResult::fit_content, 0))?;
4661
+
4662
+ let class = module.define_class("CachedPage", ruby.class_object())?;
4663
+ class.define_singleton_method("new", function!(CachedPage::new, 7))?;
4664
+ class.define_method("url", method!(CachedPage::url, 0))?;
4665
+ class.define_method("status_code", method!(CachedPage::status_code, 0))?;
4666
+ class.define_method("content_type", method!(CachedPage::content_type, 0))?;
4667
+ class.define_method("body", method!(CachedPage::body, 0))?;
4668
+ class.define_method("etag", method!(CachedPage::etag, 0))?;
4669
+ class.define_method("last_modified", method!(CachedPage::last_modified, 0))?;
4670
+ class.define_method("cached_at", method!(CachedPage::cached_at, 0))?;
4671
+
4672
+ let class = module.define_class("LinkInfo", ruby.class_object())?;
4673
+ class.define_singleton_method("new", function!(LinkInfo::new, 5))?;
4674
+ class.define_method("url", method!(LinkInfo::url, 0))?;
4675
+ class.define_method("text", method!(LinkInfo::text, 0))?;
4676
+ class.define_method("link_type", method!(LinkInfo::link_type, 0))?;
4677
+ class.define_method("rel", method!(LinkInfo::rel, 0))?;
4678
+ class.define_method("nofollow", method!(LinkInfo::nofollow, 0))?;
4679
+
4680
+ let class = module.define_class("ImageInfo", ruby.class_object())?;
4681
+ class.define_singleton_method("new", function!(ImageInfo::new, 5))?;
4682
+ class.define_method("url", method!(ImageInfo::url, 0))?;
4683
+ class.define_method("alt", method!(ImageInfo::alt, 0))?;
4684
+ class.define_method("width", method!(ImageInfo::width, 0))?;
4685
+ class.define_method("height", method!(ImageInfo::height, 0))?;
4686
+ class.define_method("source", method!(ImageInfo::source, 0))?;
4687
+
4688
+ let class = module.define_class("FeedInfo", ruby.class_object())?;
4689
+ class.define_singleton_method("new", function!(FeedInfo::new, 3))?;
4690
+ class.define_method("url", method!(FeedInfo::url, 0))?;
4691
+ class.define_method("title", method!(FeedInfo::title, 0))?;
4692
+ class.define_method("feed_type", method!(FeedInfo::feed_type, 0))?;
4693
+
4694
+ let class = module.define_class("JsonLdEntry", ruby.class_object())?;
4695
+ class.define_singleton_method("new", function!(JsonLdEntry::new, 3))?;
4696
+ class.define_method("schema_type", method!(JsonLdEntry::schema_type, 0))?;
4697
+ class.define_method("name", method!(JsonLdEntry::name, 0))?;
4698
+ class.define_method("raw", method!(JsonLdEntry::raw, 0))?;
4699
+
4700
+ let class = module.define_class("CookieInfo", ruby.class_object())?;
4701
+ class.define_singleton_method("new", function!(CookieInfo::new, 4))?;
4702
+ class.define_method("name", method!(CookieInfo::name, 0))?;
4703
+ class.define_method("value", method!(CookieInfo::value, 0))?;
4704
+ class.define_method("domain", method!(CookieInfo::domain, 0))?;
4705
+ class.define_method("path", method!(CookieInfo::path, 0))?;
4706
+
4707
+ let class = module.define_class("DownloadedAsset", ruby.class_object())?;
4708
+ class.define_singleton_method("new", function!(DownloadedAsset::new, 6))?;
4709
+ class.define_method("url", method!(DownloadedAsset::url, 0))?;
4710
+ class.define_method("content_hash", method!(DownloadedAsset::content_hash, 0))?;
4711
+ class.define_method("mime_type", method!(DownloadedAsset::mime_type, 0))?;
4712
+ class.define_method("size", method!(DownloadedAsset::size, 0))?;
4713
+ class.define_method("asset_category", method!(DownloadedAsset::asset_category, 0))?;
4714
+ class.define_method("html_tag", method!(DownloadedAsset::html_tag, 0))?;
4715
+
4716
+ let class = module.define_class("ArticleMetadata", ruby.class_object())?;
4717
+ class.define_singleton_method("new", function!(ArticleMetadata::new, 5))?;
4718
+ class.define_method("published_time", method!(ArticleMetadata::published_time, 0))?;
4719
+ class.define_method("modified_time", method!(ArticleMetadata::modified_time, 0))?;
4720
+ class.define_method("author", method!(ArticleMetadata::author, 0))?;
4721
+ class.define_method("section", method!(ArticleMetadata::section, 0))?;
4722
+ class.define_method("tags", method!(ArticleMetadata::tags, 0))?;
4723
+
4724
+ let class = module.define_class("HreflangEntry", ruby.class_object())?;
4725
+ class.define_singleton_method("new", function!(HreflangEntry::new, 2))?;
4726
+ class.define_method("lang", method!(HreflangEntry::lang, 0))?;
4727
+ class.define_method("url", method!(HreflangEntry::url, 0))?;
4728
+
4729
+ let class = module.define_class("FaviconInfo", ruby.class_object())?;
4730
+ class.define_singleton_method("new", function!(FaviconInfo::new, 4))?;
4731
+ class.define_method("url", method!(FaviconInfo::url, 0))?;
4732
+ class.define_method("rel", method!(FaviconInfo::rel, 0))?;
4733
+ class.define_method("sizes", method!(FaviconInfo::sizes, 0))?;
4734
+ class.define_method("mime_type", method!(FaviconInfo::mime_type, 0))?;
4735
+
4736
+ let class = module.define_class("HeadingInfo", ruby.class_object())?;
4737
+ class.define_singleton_method("new", function!(HeadingInfo::new, 2))?;
4738
+ class.define_method("level", method!(HeadingInfo::level, 0))?;
4739
+ class.define_method("text", method!(HeadingInfo::text, 0))?;
4740
+
4741
+ let class = module.define_class("ResponseMeta", ruby.class_object())?;
4742
+ class.define_singleton_method("new", function!(ResponseMeta::new, 7))?;
4743
+ class.define_method("etag", method!(ResponseMeta::etag, 0))?;
4744
+ class.define_method("last_modified", method!(ResponseMeta::last_modified, 0))?;
4745
+ class.define_method("cache_control", method!(ResponseMeta::cache_control, 0))?;
4746
+ class.define_method("server", method!(ResponseMeta::server, 0))?;
4747
+ class.define_method("x_powered_by", method!(ResponseMeta::x_powered_by, 0))?;
4748
+ class.define_method("content_language", method!(ResponseMeta::content_language, 0))?;
4749
+ class.define_method("content_encoding", method!(ResponseMeta::content_encoding, 0))?;
4750
+
4751
+ let class = module.define_class("PageMetadata", ruby.class_object())?;
4752
+ class.define_singleton_method("new", function!(PageMetadata::new, 1))?;
4753
+ class.define_method("title", method!(PageMetadata::title, 0))?;
4754
+ class.define_method("description", method!(PageMetadata::description, 0))?;
4755
+ class.define_method("canonical_url", method!(PageMetadata::canonical_url, 0))?;
4756
+ class.define_method("keywords", method!(PageMetadata::keywords, 0))?;
4757
+ class.define_method("author", method!(PageMetadata::author, 0))?;
4758
+ class.define_method("viewport", method!(PageMetadata::viewport, 0))?;
4759
+ class.define_method("theme_color", method!(PageMetadata::theme_color, 0))?;
4760
+ class.define_method("generator", method!(PageMetadata::generator, 0))?;
4761
+ class.define_method("robots", method!(PageMetadata::robots, 0))?;
4762
+ class.define_method("html_lang", method!(PageMetadata::html_lang, 0))?;
4763
+ class.define_method("html_dir", method!(PageMetadata::html_dir, 0))?;
4764
+ class.define_method("og_title", method!(PageMetadata::og_title, 0))?;
4765
+ class.define_method("og_type", method!(PageMetadata::og_type, 0))?;
4766
+ class.define_method("og_image", method!(PageMetadata::og_image, 0))?;
4767
+ class.define_method("og_description", method!(PageMetadata::og_description, 0))?;
4768
+ class.define_method("og_url", method!(PageMetadata::og_url, 0))?;
4769
+ class.define_method("og_site_name", method!(PageMetadata::og_site_name, 0))?;
4770
+ class.define_method("og_locale", method!(PageMetadata::og_locale, 0))?;
4771
+ class.define_method("og_video", method!(PageMetadata::og_video, 0))?;
4772
+ class.define_method("og_audio", method!(PageMetadata::og_audio, 0))?;
4773
+ class.define_method("og_locale_alternates", method!(PageMetadata::og_locale_alternates, 0))?;
4774
+ class.define_method("twitter_card", method!(PageMetadata::twitter_card, 0))?;
4775
+ class.define_method("twitter_title", method!(PageMetadata::twitter_title, 0))?;
4776
+ class.define_method("twitter_description", method!(PageMetadata::twitter_description, 0))?;
4777
+ class.define_method("twitter_image", method!(PageMetadata::twitter_image, 0))?;
4778
+ class.define_method("twitter_site", method!(PageMetadata::twitter_site, 0))?;
4779
+ class.define_method("twitter_creator", method!(PageMetadata::twitter_creator, 0))?;
4780
+ class.define_method("dc_title", method!(PageMetadata::dc_title, 0))?;
4781
+ class.define_method("dc_creator", method!(PageMetadata::dc_creator, 0))?;
4782
+ class.define_method("dc_subject", method!(PageMetadata::dc_subject, 0))?;
4783
+ class.define_method("dc_description", method!(PageMetadata::dc_description, 0))?;
4784
+ class.define_method("dc_publisher", method!(PageMetadata::dc_publisher, 0))?;
4785
+ class.define_method("dc_date", method!(PageMetadata::dc_date, 0))?;
4786
+ class.define_method("dc_type", method!(PageMetadata::dc_type, 0))?;
4787
+ class.define_method("dc_format", method!(PageMetadata::dc_format, 0))?;
4788
+ class.define_method("dc_identifier", method!(PageMetadata::dc_identifier, 0))?;
4789
+ class.define_method("dc_language", method!(PageMetadata::dc_language, 0))?;
4790
+ class.define_method("dc_rights", method!(PageMetadata::dc_rights, 0))?;
4791
+ class.define_method("article", method!(PageMetadata::article, 0))?;
4792
+ class.define_method("hreflangs", method!(PageMetadata::hreflangs, 0))?;
4793
+ class.define_method("favicons", method!(PageMetadata::favicons, 0))?;
4794
+ class.define_method("headings", method!(PageMetadata::headings, 0))?;
4795
+ class.define_method("word_count", method!(PageMetadata::word_count, 0))?;
4796
+
4797
+ let class = module.define_class("CitationResult", ruby.class_object())?;
4798
+ class.define_singleton_method("new", function!(CitationResult::new, 2))?;
4799
+ class.define_method("content", method!(CitationResult::content, 0))?;
4800
+ class.define_method("references", method!(CitationResult::references, 0))?;
4801
+
4802
+ let class = module.define_class("CitationReference", ruby.class_object())?;
4803
+ class.define_singleton_method("new", function!(CitationReference::new, 3))?;
4804
+ class.define_method("index", method!(CitationReference::index, 0))?;
4805
+ class.define_method("url", method!(CitationReference::url, 0))?;
4806
+ class.define_method("text", method!(CitationReference::text, 0))?;
4807
+
4808
+ let _class = module.define_class("CrawlEngineHandle", ruby.class_object())?;
4809
+
4810
+ let class = module.define_class("BatchScrapeResult", ruby.class_object())?;
4811
+ class.define_singleton_method("new", function!(BatchScrapeResult::new, 3))?;
4812
+ class.define_method("url", method!(BatchScrapeResult::url, 0))?;
4813
+ class.define_method("result", method!(BatchScrapeResult::result, 0))?;
4814
+ class.define_method("error", method!(BatchScrapeResult::error, 0))?;
4815
+
4816
+ let class = module.define_class("BatchCrawlResult", ruby.class_object())?;
4817
+ class.define_singleton_method("new", function!(BatchCrawlResult::new, 3))?;
4818
+ class.define_method("url", method!(BatchCrawlResult::url, 0))?;
4819
+ class.define_method("result", method!(BatchCrawlResult::result, 0))?;
4820
+ class.define_method("error", method!(BatchCrawlResult::error, 0))?;
4821
+
4822
+ module.define_module_function("create_engine", function!(create_engine, 1))?;
4823
+ module.define_module_function("scrape", function!(scrape, 2))?;
4824
+ module.define_module_function("scrape_async", function!(scrape_async, 2))?;
4825
+ module.define_module_function("crawl", function!(crawl, 2))?;
4826
+ module.define_module_function("crawl_async", function!(crawl_async, 2))?;
4827
+ module.define_module_function("map_urls", function!(map_urls, 2))?;
4828
+ module.define_module_function("map_urls_async", function!(map_urls_async, 2))?;
4829
+ module.define_module_function("batch_scrape", function!(batch_scrape, 2))?;
4830
+ module.define_module_function("batch_scrape_async", function!(batch_scrape_async, 2))?;
4831
+ module.define_module_function("batch_crawl", function!(batch_crawl, 2))?;
4832
+ module.define_module_function("batch_crawl_async", function!(batch_crawl_async, 2))?;
4833
+
4834
+ Ok(())
4835
+ }