kreuzcrawl 0.1.2 → 0.3.0.pre.rc.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/kreuzcrawl_rb/Cargo.toml +1 -1
- data/ext/kreuzcrawl_rb/src/kreuzcrawl/version.rb +6 -2
- data/ext/kreuzcrawl_rb/src/kreuzcrawl.rb +5 -1
- data/ext/kreuzcrawl_rb/src/lib.rs +255 -41
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 92258fdaf73e3d2a7c3d2973905388ea408deb41a035d5d5a35613924c9417da
|
|
4
|
+
data.tar.gz: a907f79b86c3511a7c572dfcf07fa342d19f1e2455c25f47a7ea75b2b543c171
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 86aa4b0c8f043de802d7d844167b351b0e140a46b2f2d04664e0710104e3c305b5e156a709aeb0303a534cb7a53818180015c6b82bebc3ce41f84f48ae0feded
|
|
7
|
+
data.tar.gz: 2d3747ecd2e4a410e93f894e1ca16a48ad881735eaed33d0989d8abfd4278cb55970cafa5c925e8ce3b0ef5b2b9d15bdb79827fe8451b1281f6ba9501f9f326b
|
|
@@ -1,6 +1,10 @@
|
|
|
1
|
-
# This file is auto-generated by alef
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:b59e800fdddf59213911a2309f9e365fa6993399f1c12fbe68bd27b269cff2d9
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
2
6
|
# frozen_string_literal: true
|
|
3
7
|
|
|
4
8
|
module Kreuzcrawl
|
|
5
|
-
VERSION = "0.
|
|
9
|
+
VERSION = "0.3.0-rc.2"
|
|
6
10
|
end
|
|
@@ -1,4 +1,8 @@
|
|
|
1
|
-
# This file is auto-generated by alef
|
|
1
|
+
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
+
# alef:hash:cb606b2a7daa26b35ced54ce0131bc649210d7c9396f1309bf155a45e52ef34e
|
|
3
|
+
# To regenerate: alef generate
|
|
4
|
+
# To verify freshness: alef verify --exit-code
|
|
5
|
+
# Issues & docs: https://github.com/kreuzberg-dev/alef
|
|
2
6
|
# frozen_string_literal: true
|
|
3
7
|
|
|
4
8
|
require_relative 'kreuzcrawl/version'
|
|
@@ -1,19 +1,7 @@
|
|
|
1
1
|
// This file is auto-generated by alef. DO NOT EDIT.
|
|
2
|
+
// alef:hash:38edd0f53a45bf9e74fc6f8570ccc68607d810942abb51abaedc20c4afa2ccda
|
|
2
3
|
// Re-generate with: alef generate
|
|
3
4
|
#![allow(dead_code)]
|
|
4
|
-
#![allow(
|
|
5
|
-
clippy::too_many_arguments,
|
|
6
|
-
clippy::let_unit_value,
|
|
7
|
-
clippy::needless_borrow,
|
|
8
|
-
clippy::map_identity,
|
|
9
|
-
clippy::just_underscores_and_digits,
|
|
10
|
-
clippy::unused_unit,
|
|
11
|
-
clippy::unnecessary_cast,
|
|
12
|
-
clippy::unwrap_or_default,
|
|
13
|
-
clippy::derivable_impls,
|
|
14
|
-
clippy::needless_borrows_for_generic_args,
|
|
15
|
-
clippy::unnecessary_fallible_conversions
|
|
16
|
-
)]
|
|
17
5
|
|
|
18
6
|
use magnus::{Error, IntoValueFromNative, Ruby, function, method, prelude::*, try_convert::TryConvertOwned};
|
|
19
7
|
use std::collections::HashMap;
|
|
@@ -175,6 +163,133 @@ impl ProxyConfig {
|
|
|
175
163
|
}
|
|
176
164
|
}
|
|
177
165
|
|
|
166
|
+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
167
|
+
#[magnus::wrap(class = "Kreuzcrawl::ContentConfig")]
|
|
168
|
+
#[serde(default)]
|
|
169
|
+
pub struct ContentConfig {
|
|
170
|
+
pub output_format: String,
|
|
171
|
+
pub preprocessing_preset: String,
|
|
172
|
+
pub remove_navigation: bool,
|
|
173
|
+
pub remove_forms: bool,
|
|
174
|
+
pub strip_tags: Vec<String>,
|
|
175
|
+
pub preserve_tags: Vec<String>,
|
|
176
|
+
pub exclude_selectors: Vec<String>,
|
|
177
|
+
pub skip_images: bool,
|
|
178
|
+
pub max_depth: Option<usize>,
|
|
179
|
+
pub wrap: bool,
|
|
180
|
+
pub wrap_width: usize,
|
|
181
|
+
pub include_document_structure: bool,
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
unsafe impl IntoValueFromNative for ContentConfig {}
|
|
185
|
+
|
|
186
|
+
impl magnus::TryConvert for ContentConfig {
|
|
187
|
+
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
188
|
+
let r: &ContentConfig = magnus::TryConvert::try_convert(val)?;
|
|
189
|
+
Ok(r.clone())
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
unsafe impl TryConvertOwned for ContentConfig {}
|
|
193
|
+
|
|
194
|
+
impl Default for ContentConfig {
|
|
195
|
+
fn default() -> Self {
|
|
196
|
+
Self {
|
|
197
|
+
output_format: Default::default(),
|
|
198
|
+
preprocessing_preset: Default::default(),
|
|
199
|
+
remove_navigation: Default::default(),
|
|
200
|
+
remove_forms: Default::default(),
|
|
201
|
+
strip_tags: Default::default(),
|
|
202
|
+
preserve_tags: Default::default(),
|
|
203
|
+
exclude_selectors: Default::default(),
|
|
204
|
+
skip_images: Default::default(),
|
|
205
|
+
max_depth: Default::default(),
|
|
206
|
+
wrap: Default::default(),
|
|
207
|
+
wrap_width: Default::default(),
|
|
208
|
+
include_document_structure: Default::default(),
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
impl ContentConfig {
|
|
214
|
+
fn new(
|
|
215
|
+
output_format: Option<String>,
|
|
216
|
+
preprocessing_preset: Option<String>,
|
|
217
|
+
remove_navigation: Option<bool>,
|
|
218
|
+
remove_forms: Option<bool>,
|
|
219
|
+
strip_tags: Option<Vec<String>>,
|
|
220
|
+
preserve_tags: Option<Vec<String>>,
|
|
221
|
+
exclude_selectors: Option<Vec<String>>,
|
|
222
|
+
skip_images: Option<bool>,
|
|
223
|
+
max_depth: Option<usize>,
|
|
224
|
+
wrap: Option<bool>,
|
|
225
|
+
wrap_width: Option<usize>,
|
|
226
|
+
include_document_structure: Option<bool>,
|
|
227
|
+
) -> Self {
|
|
228
|
+
Self {
|
|
229
|
+
output_format: output_format.unwrap_or("markdown".to_string()),
|
|
230
|
+
preprocessing_preset: preprocessing_preset.unwrap_or("standard".to_string()),
|
|
231
|
+
remove_navigation: remove_navigation.unwrap_or(true),
|
|
232
|
+
remove_forms: remove_forms.unwrap_or(true),
|
|
233
|
+
strip_tags: strip_tags.unwrap_or_default(),
|
|
234
|
+
preserve_tags: preserve_tags.unwrap_or_default(),
|
|
235
|
+
exclude_selectors: exclude_selectors.unwrap_or_default(),
|
|
236
|
+
skip_images: skip_images.unwrap_or(false),
|
|
237
|
+
max_depth,
|
|
238
|
+
wrap: wrap.unwrap_or(false),
|
|
239
|
+
wrap_width: wrap_width.unwrap_or(80),
|
|
240
|
+
include_document_structure: include_document_structure.unwrap_or(true),
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
fn output_format(&self) -> String {
|
|
245
|
+
self.output_format.clone()
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
fn preprocessing_preset(&self) -> String {
|
|
249
|
+
self.preprocessing_preset.clone()
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
fn remove_navigation(&self) -> bool {
|
|
253
|
+
self.remove_navigation
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
fn remove_forms(&self) -> bool {
|
|
257
|
+
self.remove_forms
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
fn strip_tags(&self) -> Vec<String> {
|
|
261
|
+
self.strip_tags.clone()
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
fn preserve_tags(&self) -> Vec<String> {
|
|
265
|
+
self.preserve_tags.clone()
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
fn exclude_selectors(&self) -> Vec<String> {
|
|
269
|
+
self.exclude_selectors.clone()
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
fn skip_images(&self) -> bool {
|
|
273
|
+
self.skip_images
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
fn max_depth(&self) -> Option<usize> {
|
|
277
|
+
self.max_depth
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
fn wrap(&self) -> bool {
|
|
281
|
+
self.wrap
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn wrap_width(&self) -> usize {
|
|
285
|
+
self.wrap_width
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
fn include_document_structure(&self) -> bool {
|
|
289
|
+
self.include_document_structure
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
178
293
|
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
|
|
179
294
|
#[magnus::wrap(class = "Kreuzcrawl::BrowserConfig")]
|
|
180
295
|
#[serde(default)]
|
|
@@ -263,8 +378,8 @@ pub struct CrawlConfig {
|
|
|
263
378
|
pub cookies_enabled: bool,
|
|
264
379
|
pub auth: Option<AuthConfig>,
|
|
265
380
|
pub max_body_size: Option<usize>,
|
|
266
|
-
pub main_content_only: bool,
|
|
267
381
|
pub remove_tags: Vec<String>,
|
|
382
|
+
pub content: ContentConfig,
|
|
268
383
|
pub map_limit: Option<usize>,
|
|
269
384
|
pub map_search: Option<String>,
|
|
270
385
|
pub download_assets: bool,
|
|
@@ -361,14 +476,14 @@ impl CrawlConfig {
|
|
|
361
476
|
max_body_size: kwargs
|
|
362
477
|
.get(ruby.to_symbol("max_body_size"))
|
|
363
478
|
.and_then(|v| usize::try_convert(v).ok()),
|
|
364
|
-
main_content_only: kwargs
|
|
365
|
-
.get(ruby.to_symbol("main_content_only"))
|
|
366
|
-
.and_then(|v| bool::try_convert(v).ok())
|
|
367
|
-
.unwrap_or(false),
|
|
368
479
|
remove_tags: kwargs
|
|
369
480
|
.get(ruby.to_symbol("remove_tags"))
|
|
370
481
|
.and_then(|v| <Vec<String>>::try_convert(v).ok())
|
|
371
482
|
.unwrap_or_default(),
|
|
483
|
+
content: kwargs
|
|
484
|
+
.get(ruby.to_symbol("content"))
|
|
485
|
+
.and_then(|v| ContentConfig::try_convert(v).ok())
|
|
486
|
+
.unwrap_or_default(),
|
|
372
487
|
map_limit: kwargs
|
|
373
488
|
.get(ruby.to_symbol("map_limit"))
|
|
374
489
|
.and_then(|v| usize::try_convert(v).ok()),
|
|
@@ -497,14 +612,14 @@ impl CrawlConfig {
|
|
|
497
612
|
self.max_body_size
|
|
498
613
|
}
|
|
499
614
|
|
|
500
|
-
fn main_content_only(&self) -> bool {
|
|
501
|
-
self.main_content_only
|
|
502
|
-
}
|
|
503
|
-
|
|
504
615
|
fn remove_tags(&self) -> Vec<String> {
|
|
505
616
|
self.remove_tags.clone()
|
|
506
617
|
}
|
|
507
618
|
|
|
619
|
+
fn content(&self) -> ContentConfig {
|
|
620
|
+
self.content.clone()
|
|
621
|
+
}
|
|
622
|
+
|
|
508
623
|
fn map_limit(&self) -> Option<usize> {
|
|
509
624
|
self.map_limit
|
|
510
625
|
}
|
|
@@ -586,8 +701,8 @@ impl CrawlConfig {
|
|
|
586
701
|
cookies_enabled: self.cookies_enabled,
|
|
587
702
|
auth: self.auth.clone().map(Into::into),
|
|
588
703
|
max_body_size: self.max_body_size,
|
|
589
|
-
main_content_only: self.main_content_only,
|
|
590
704
|
remove_tags: self.remove_tags.clone(),
|
|
705
|
+
content: self.content.clone().into(),
|
|
591
706
|
map_limit: self.map_limit,
|
|
592
707
|
map_search: self.map_search.clone(),
|
|
593
708
|
download_assets: self.download_assets,
|
|
@@ -723,7 +838,6 @@ pub struct ScrapeResult {
|
|
|
723
838
|
pub is_pdf: bool,
|
|
724
839
|
pub was_skipped: bool,
|
|
725
840
|
pub detected_charset: Option<String>,
|
|
726
|
-
pub main_content_only: bool,
|
|
727
841
|
pub auth_header_sent: bool,
|
|
728
842
|
pub response_meta: Option<ResponseMeta>,
|
|
729
843
|
pub assets: Vec<DownloadedAsset>,
|
|
@@ -766,7 +880,6 @@ impl Default for ScrapeResult {
|
|
|
766
880
|
is_pdf: Default::default(),
|
|
767
881
|
was_skipped: Default::default(),
|
|
768
882
|
detected_charset: Default::default(),
|
|
769
|
-
main_content_only: Default::default(),
|
|
770
883
|
auth_header_sent: Default::default(),
|
|
771
884
|
response_meta: Default::default(),
|
|
772
885
|
assets: Default::default(),
|
|
@@ -850,10 +963,6 @@ impl ScrapeResult {
|
|
|
850
963
|
detected_charset: kwargs
|
|
851
964
|
.get(ruby.to_symbol("detected_charset"))
|
|
852
965
|
.and_then(|v| String::try_convert(v).ok()),
|
|
853
|
-
main_content_only: kwargs
|
|
854
|
-
.get(ruby.to_symbol("main_content_only"))
|
|
855
|
-
.and_then(|v| bool::try_convert(v).ok())
|
|
856
|
-
.unwrap_or_default(),
|
|
857
966
|
auth_header_sent: kwargs
|
|
858
967
|
.get(ruby.to_symbol("auth_header_sent"))
|
|
859
968
|
.and_then(|v| bool::try_convert(v).ok())
|
|
@@ -959,10 +1068,6 @@ impl ScrapeResult {
|
|
|
959
1068
|
self.detected_charset.clone()
|
|
960
1069
|
}
|
|
961
1070
|
|
|
962
|
-
fn main_content_only(&self) -> bool {
|
|
963
|
-
self.main_content_only
|
|
964
|
-
}
|
|
965
|
-
|
|
966
1071
|
fn auth_header_sent(&self) -> bool {
|
|
967
1072
|
self.auth_header_sent
|
|
968
1073
|
}
|
|
@@ -3270,6 +3375,7 @@ fn batch_crawl_async(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec
|
|
|
3270
3375
|
Ok(result.into_iter().map(Into::into).collect())
|
|
3271
3376
|
}
|
|
3272
3377
|
|
|
3378
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3273
3379
|
impl From<ExtractionMeta> for kreuzcrawl::ExtractionMeta {
|
|
3274
3380
|
fn from(val: ExtractionMeta) -> Self {
|
|
3275
3381
|
Self {
|
|
@@ -3282,6 +3388,7 @@ impl From<ExtractionMeta> for kreuzcrawl::ExtractionMeta {
|
|
|
3282
3388
|
}
|
|
3283
3389
|
}
|
|
3284
3390
|
|
|
3391
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3285
3392
|
impl From<kreuzcrawl::ExtractionMeta> for ExtractionMeta {
|
|
3286
3393
|
fn from(val: kreuzcrawl::ExtractionMeta) -> Self {
|
|
3287
3394
|
Self {
|
|
@@ -3294,6 +3401,7 @@ impl From<kreuzcrawl::ExtractionMeta> for ExtractionMeta {
|
|
|
3294
3401
|
}
|
|
3295
3402
|
}
|
|
3296
3403
|
|
|
3404
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3297
3405
|
impl From<ProxyConfig> for kreuzcrawl::ProxyConfig {
|
|
3298
3406
|
fn from(val: ProxyConfig) -> Self {
|
|
3299
3407
|
Self {
|
|
@@ -3304,6 +3412,7 @@ impl From<ProxyConfig> for kreuzcrawl::ProxyConfig {
|
|
|
3304
3412
|
}
|
|
3305
3413
|
}
|
|
3306
3414
|
|
|
3415
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3307
3416
|
impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
|
|
3308
3417
|
fn from(val: kreuzcrawl::ProxyConfig) -> Self {
|
|
3309
3418
|
Self {
|
|
@@ -3314,6 +3423,47 @@ impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
|
|
|
3314
3423
|
}
|
|
3315
3424
|
}
|
|
3316
3425
|
|
|
3426
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3427
|
+
impl From<ContentConfig> for kreuzcrawl::ContentConfig {
|
|
3428
|
+
fn from(val: ContentConfig) -> Self {
|
|
3429
|
+
Self {
|
|
3430
|
+
output_format: val.output_format,
|
|
3431
|
+
preprocessing_preset: val.preprocessing_preset,
|
|
3432
|
+
remove_navigation: val.remove_navigation,
|
|
3433
|
+
remove_forms: val.remove_forms,
|
|
3434
|
+
strip_tags: val.strip_tags,
|
|
3435
|
+
preserve_tags: val.preserve_tags,
|
|
3436
|
+
exclude_selectors: val.exclude_selectors,
|
|
3437
|
+
skip_images: val.skip_images,
|
|
3438
|
+
max_depth: val.max_depth,
|
|
3439
|
+
wrap: val.wrap,
|
|
3440
|
+
wrap_width: val.wrap_width,
|
|
3441
|
+
include_document_structure: val.include_document_structure,
|
|
3442
|
+
}
|
|
3443
|
+
}
|
|
3444
|
+
}
|
|
3445
|
+
|
|
3446
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3447
|
+
impl From<kreuzcrawl::ContentConfig> for ContentConfig {
|
|
3448
|
+
fn from(val: kreuzcrawl::ContentConfig) -> Self {
|
|
3449
|
+
Self {
|
|
3450
|
+
output_format: val.output_format,
|
|
3451
|
+
preprocessing_preset: val.preprocessing_preset,
|
|
3452
|
+
remove_navigation: val.remove_navigation,
|
|
3453
|
+
remove_forms: val.remove_forms,
|
|
3454
|
+
strip_tags: val.strip_tags,
|
|
3455
|
+
preserve_tags: val.preserve_tags,
|
|
3456
|
+
exclude_selectors: val.exclude_selectors,
|
|
3457
|
+
skip_images: val.skip_images,
|
|
3458
|
+
max_depth: val.max_depth,
|
|
3459
|
+
wrap: val.wrap,
|
|
3460
|
+
wrap_width: val.wrap_width,
|
|
3461
|
+
include_document_structure: val.include_document_structure,
|
|
3462
|
+
}
|
|
3463
|
+
}
|
|
3464
|
+
}
|
|
3465
|
+
|
|
3466
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3317
3467
|
impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
|
|
3318
3468
|
fn from(val: BrowserConfig) -> Self {
|
|
3319
3469
|
Self {
|
|
@@ -3327,6 +3477,7 @@ impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
|
|
|
3327
3477
|
}
|
|
3328
3478
|
}
|
|
3329
3479
|
|
|
3480
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3330
3481
|
impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
|
|
3331
3482
|
fn from(val: kreuzcrawl::BrowserConfig) -> Self {
|
|
3332
3483
|
Self {
|
|
@@ -3341,6 +3492,7 @@ impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
|
|
|
3341
3492
|
}
|
|
3342
3493
|
|
|
3343
3494
|
#[allow(clippy::needless_update)]
|
|
3495
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3344
3496
|
impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
3345
3497
|
fn from(val: CrawlConfig) -> Self {
|
|
3346
3498
|
Self {
|
|
@@ -3362,8 +3514,8 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
3362
3514
|
cookies_enabled: val.cookies_enabled,
|
|
3363
3515
|
auth: val.auth.map(Into::into),
|
|
3364
3516
|
max_body_size: val.max_body_size,
|
|
3365
|
-
main_content_only: val.main_content_only,
|
|
3366
3517
|
remove_tags: val.remove_tags,
|
|
3518
|
+
content: val.content.into(),
|
|
3367
3519
|
map_limit: val.map_limit,
|
|
3368
3520
|
map_search: val.map_search,
|
|
3369
3521
|
download_assets: val.download_assets,
|
|
@@ -3384,6 +3536,7 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
3384
3536
|
}
|
|
3385
3537
|
}
|
|
3386
3538
|
|
|
3539
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3387
3540
|
impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
|
|
3388
3541
|
fn from(val: kreuzcrawl::CrawlConfig) -> Self {
|
|
3389
3542
|
Self {
|
|
@@ -3405,8 +3558,8 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
|
|
|
3405
3558
|
cookies_enabled: val.cookies_enabled,
|
|
3406
3559
|
auth: val.auth.map(Into::into),
|
|
3407
3560
|
max_body_size: val.max_body_size,
|
|
3408
|
-
main_content_only: val.main_content_only,
|
|
3409
3561
|
remove_tags: val.remove_tags,
|
|
3562
|
+
content: val.content.into(),
|
|
3410
3563
|
map_limit: val.map_limit,
|
|
3411
3564
|
map_search: val.map_search,
|
|
3412
3565
|
download_assets: val.download_assets,
|
|
@@ -3426,12 +3579,13 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
|
|
|
3426
3579
|
}
|
|
3427
3580
|
}
|
|
3428
3581
|
|
|
3582
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3429
3583
|
impl From<DownloadedDocument> for kreuzcrawl::DownloadedDocument {
|
|
3430
3584
|
fn from(val: DownloadedDocument) -> Self {
|
|
3431
3585
|
Self {
|
|
3432
3586
|
url: val.url,
|
|
3433
3587
|
mime_type: Default::default(),
|
|
3434
|
-
content: val.content,
|
|
3588
|
+
content: val.content.into(),
|
|
3435
3589
|
size: val.size,
|
|
3436
3590
|
filename: Default::default(),
|
|
3437
3591
|
content_hash: Default::default(),
|
|
@@ -3440,6 +3594,7 @@ impl From<DownloadedDocument> for kreuzcrawl::DownloadedDocument {
|
|
|
3440
3594
|
}
|
|
3441
3595
|
}
|
|
3442
3596
|
|
|
3597
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3443
3598
|
impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
|
|
3444
3599
|
fn from(val: kreuzcrawl::DownloadedDocument) -> Self {
|
|
3445
3600
|
Self {
|
|
@@ -3458,6 +3613,7 @@ impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
|
|
|
3458
3613
|
}
|
|
3459
3614
|
}
|
|
3460
3615
|
|
|
3616
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3461
3617
|
impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
3462
3618
|
fn from(val: ScrapeResult) -> Self {
|
|
3463
3619
|
Self {
|
|
@@ -3478,7 +3634,6 @@ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
|
3478
3634
|
is_pdf: val.is_pdf,
|
|
3479
3635
|
was_skipped: val.was_skipped,
|
|
3480
3636
|
detected_charset: val.detected_charset,
|
|
3481
|
-
main_content_only: val.main_content_only,
|
|
3482
3637
|
auth_header_sent: val.auth_header_sent,
|
|
3483
3638
|
response_meta: val.response_meta.map(Into::into),
|
|
3484
3639
|
assets: val.assets.into_iter().map(Into::into).collect(),
|
|
@@ -3487,12 +3642,13 @@ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
|
3487
3642
|
markdown: val.markdown.map(Into::into),
|
|
3488
3643
|
extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
|
|
3489
3644
|
extraction_meta: val.extraction_meta.map(Into::into),
|
|
3490
|
-
screenshot: val.screenshot,
|
|
3645
|
+
screenshot: val.screenshot.map(Into::into),
|
|
3491
3646
|
downloaded_document: val.downloaded_document.map(Into::into),
|
|
3492
3647
|
}
|
|
3493
3648
|
}
|
|
3494
3649
|
}
|
|
3495
3650
|
|
|
3651
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3496
3652
|
impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
|
|
3497
3653
|
fn from(val: kreuzcrawl::ScrapeResult) -> Self {
|
|
3498
3654
|
Self {
|
|
@@ -3513,7 +3669,6 @@ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
|
|
|
3513
3669
|
is_pdf: val.is_pdf,
|
|
3514
3670
|
was_skipped: val.was_skipped,
|
|
3515
3671
|
detected_charset: val.detected_charset,
|
|
3516
|
-
main_content_only: val.main_content_only,
|
|
3517
3672
|
auth_header_sent: val.auth_header_sent,
|
|
3518
3673
|
response_meta: val.response_meta.map(Into::into),
|
|
3519
3674
|
assets: val.assets.into_iter().map(Into::into).collect(),
|
|
@@ -3528,6 +3683,7 @@ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
|
|
|
3528
3683
|
}
|
|
3529
3684
|
}
|
|
3530
3685
|
|
|
3686
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3531
3687
|
impl From<CrawlPageResult> for kreuzcrawl::CrawlPageResult {
|
|
3532
3688
|
fn from(val: CrawlPageResult) -> Self {
|
|
3533
3689
|
Self {
|
|
@@ -3555,6 +3711,7 @@ impl From<CrawlPageResult> for kreuzcrawl::CrawlPageResult {
|
|
|
3555
3711
|
}
|
|
3556
3712
|
}
|
|
3557
3713
|
|
|
3714
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3558
3715
|
impl From<kreuzcrawl::CrawlPageResult> for CrawlPageResult {
|
|
3559
3716
|
fn from(val: kreuzcrawl::CrawlPageResult) -> Self {
|
|
3560
3717
|
Self {
|
|
@@ -3582,6 +3739,7 @@ impl From<kreuzcrawl::CrawlPageResult> for CrawlPageResult {
|
|
|
3582
3739
|
}
|
|
3583
3740
|
}
|
|
3584
3741
|
|
|
3742
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3585
3743
|
impl From<CrawlResult> for kreuzcrawl::CrawlResult {
|
|
3586
3744
|
fn from(val: CrawlResult) -> Self {
|
|
3587
3745
|
Self {
|
|
@@ -3596,6 +3754,7 @@ impl From<CrawlResult> for kreuzcrawl::CrawlResult {
|
|
|
3596
3754
|
}
|
|
3597
3755
|
}
|
|
3598
3756
|
|
|
3757
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3599
3758
|
impl From<kreuzcrawl::CrawlResult> for CrawlResult {
|
|
3600
3759
|
fn from(val: kreuzcrawl::CrawlResult) -> Self {
|
|
3601
3760
|
Self {
|
|
@@ -3610,6 +3769,7 @@ impl From<kreuzcrawl::CrawlResult> for CrawlResult {
|
|
|
3610
3769
|
}
|
|
3611
3770
|
}
|
|
3612
3771
|
|
|
3772
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3613
3773
|
impl From<SitemapUrl> for kreuzcrawl::SitemapUrl {
|
|
3614
3774
|
fn from(val: SitemapUrl) -> Self {
|
|
3615
3775
|
Self {
|
|
@@ -3621,6 +3781,7 @@ impl From<SitemapUrl> for kreuzcrawl::SitemapUrl {
|
|
|
3621
3781
|
}
|
|
3622
3782
|
}
|
|
3623
3783
|
|
|
3784
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3624
3785
|
impl From<kreuzcrawl::SitemapUrl> for SitemapUrl {
|
|
3625
3786
|
fn from(val: kreuzcrawl::SitemapUrl) -> Self {
|
|
3626
3787
|
Self {
|
|
@@ -3632,6 +3793,7 @@ impl From<kreuzcrawl::SitemapUrl> for SitemapUrl {
|
|
|
3632
3793
|
}
|
|
3633
3794
|
}
|
|
3634
3795
|
|
|
3796
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3635
3797
|
impl From<MapResult> for kreuzcrawl::MapResult {
|
|
3636
3798
|
fn from(val: MapResult) -> Self {
|
|
3637
3799
|
Self {
|
|
@@ -3640,6 +3802,7 @@ impl From<MapResult> for kreuzcrawl::MapResult {
|
|
|
3640
3802
|
}
|
|
3641
3803
|
}
|
|
3642
3804
|
|
|
3805
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3643
3806
|
impl From<kreuzcrawl::MapResult> for MapResult {
|
|
3644
3807
|
fn from(val: kreuzcrawl::MapResult) -> Self {
|
|
3645
3808
|
Self {
|
|
@@ -3648,6 +3811,7 @@ impl From<kreuzcrawl::MapResult> for MapResult {
|
|
|
3648
3811
|
}
|
|
3649
3812
|
}
|
|
3650
3813
|
|
|
3814
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3651
3815
|
impl From<MarkdownResult> for kreuzcrawl::MarkdownResult {
|
|
3652
3816
|
fn from(val: MarkdownResult) -> Self {
|
|
3653
3817
|
Self {
|
|
@@ -3668,6 +3832,7 @@ impl From<MarkdownResult> for kreuzcrawl::MarkdownResult {
|
|
|
3668
3832
|
}
|
|
3669
3833
|
}
|
|
3670
3834
|
|
|
3835
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3671
3836
|
impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
|
|
3672
3837
|
fn from(val: kreuzcrawl::MarkdownResult) -> Self {
|
|
3673
3838
|
Self {
|
|
@@ -3681,6 +3846,7 @@ impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
|
|
|
3681
3846
|
}
|
|
3682
3847
|
}
|
|
3683
3848
|
|
|
3849
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3684
3850
|
impl From<LinkInfo> for kreuzcrawl::LinkInfo {
|
|
3685
3851
|
fn from(val: LinkInfo) -> Self {
|
|
3686
3852
|
Self {
|
|
@@ -3693,6 +3859,7 @@ impl From<LinkInfo> for kreuzcrawl::LinkInfo {
|
|
|
3693
3859
|
}
|
|
3694
3860
|
}
|
|
3695
3861
|
|
|
3862
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3696
3863
|
impl From<kreuzcrawl::LinkInfo> for LinkInfo {
|
|
3697
3864
|
fn from(val: kreuzcrawl::LinkInfo) -> Self {
|
|
3698
3865
|
Self {
|
|
@@ -3705,6 +3872,7 @@ impl From<kreuzcrawl::LinkInfo> for LinkInfo {
|
|
|
3705
3872
|
}
|
|
3706
3873
|
}
|
|
3707
3874
|
|
|
3875
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3708
3876
|
impl From<ImageInfo> for kreuzcrawl::ImageInfo {
|
|
3709
3877
|
fn from(val: ImageInfo) -> Self {
|
|
3710
3878
|
Self {
|
|
@@ -3717,6 +3885,7 @@ impl From<ImageInfo> for kreuzcrawl::ImageInfo {
|
|
|
3717
3885
|
}
|
|
3718
3886
|
}
|
|
3719
3887
|
|
|
3888
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3720
3889
|
impl From<kreuzcrawl::ImageInfo> for ImageInfo {
|
|
3721
3890
|
fn from(val: kreuzcrawl::ImageInfo) -> Self {
|
|
3722
3891
|
Self {
|
|
@@ -3729,6 +3898,7 @@ impl From<kreuzcrawl::ImageInfo> for ImageInfo {
|
|
|
3729
3898
|
}
|
|
3730
3899
|
}
|
|
3731
3900
|
|
|
3901
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3732
3902
|
impl From<FeedInfo> for kreuzcrawl::FeedInfo {
|
|
3733
3903
|
fn from(val: FeedInfo) -> Self {
|
|
3734
3904
|
Self {
|
|
@@ -3739,6 +3909,7 @@ impl From<FeedInfo> for kreuzcrawl::FeedInfo {
|
|
|
3739
3909
|
}
|
|
3740
3910
|
}
|
|
3741
3911
|
|
|
3912
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3742
3913
|
impl From<kreuzcrawl::FeedInfo> for FeedInfo {
|
|
3743
3914
|
fn from(val: kreuzcrawl::FeedInfo) -> Self {
|
|
3744
3915
|
Self {
|
|
@@ -3749,6 +3920,7 @@ impl From<kreuzcrawl::FeedInfo> for FeedInfo {
|
|
|
3749
3920
|
}
|
|
3750
3921
|
}
|
|
3751
3922
|
|
|
3923
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3752
3924
|
impl From<JsonLdEntry> for kreuzcrawl::JsonLdEntry {
|
|
3753
3925
|
fn from(val: JsonLdEntry) -> Self {
|
|
3754
3926
|
Self {
|
|
@@ -3759,6 +3931,7 @@ impl From<JsonLdEntry> for kreuzcrawl::JsonLdEntry {
|
|
|
3759
3931
|
}
|
|
3760
3932
|
}
|
|
3761
3933
|
|
|
3934
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3762
3935
|
impl From<kreuzcrawl::JsonLdEntry> for JsonLdEntry {
|
|
3763
3936
|
fn from(val: kreuzcrawl::JsonLdEntry) -> Self {
|
|
3764
3937
|
Self {
|
|
@@ -3769,6 +3942,7 @@ impl From<kreuzcrawl::JsonLdEntry> for JsonLdEntry {
|
|
|
3769
3942
|
}
|
|
3770
3943
|
}
|
|
3771
3944
|
|
|
3945
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3772
3946
|
impl From<CookieInfo> for kreuzcrawl::CookieInfo {
|
|
3773
3947
|
fn from(val: CookieInfo) -> Self {
|
|
3774
3948
|
Self {
|
|
@@ -3780,6 +3954,7 @@ impl From<CookieInfo> for kreuzcrawl::CookieInfo {
|
|
|
3780
3954
|
}
|
|
3781
3955
|
}
|
|
3782
3956
|
|
|
3957
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3783
3958
|
impl From<kreuzcrawl::CookieInfo> for CookieInfo {
|
|
3784
3959
|
fn from(val: kreuzcrawl::CookieInfo) -> Self {
|
|
3785
3960
|
Self {
|
|
@@ -3791,6 +3966,7 @@ impl From<kreuzcrawl::CookieInfo> for CookieInfo {
|
|
|
3791
3966
|
}
|
|
3792
3967
|
}
|
|
3793
3968
|
|
|
3969
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3794
3970
|
impl From<DownloadedAsset> for kreuzcrawl::DownloadedAsset {
|
|
3795
3971
|
fn from(val: DownloadedAsset) -> Self {
|
|
3796
3972
|
Self {
|
|
@@ -3804,6 +3980,7 @@ impl From<DownloadedAsset> for kreuzcrawl::DownloadedAsset {
|
|
|
3804
3980
|
}
|
|
3805
3981
|
}
|
|
3806
3982
|
|
|
3983
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3807
3984
|
impl From<kreuzcrawl::DownloadedAsset> for DownloadedAsset {
|
|
3808
3985
|
fn from(val: kreuzcrawl::DownloadedAsset) -> Self {
|
|
3809
3986
|
Self {
|
|
@@ -3817,6 +3994,7 @@ impl From<kreuzcrawl::DownloadedAsset> for DownloadedAsset {
|
|
|
3817
3994
|
}
|
|
3818
3995
|
}
|
|
3819
3996
|
|
|
3997
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3820
3998
|
impl From<ArticleMetadata> for kreuzcrawl::ArticleMetadata {
|
|
3821
3999
|
fn from(val: ArticleMetadata) -> Self {
|
|
3822
4000
|
Self {
|
|
@@ -3829,6 +4007,7 @@ impl From<ArticleMetadata> for kreuzcrawl::ArticleMetadata {
|
|
|
3829
4007
|
}
|
|
3830
4008
|
}
|
|
3831
4009
|
|
|
4010
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3832
4011
|
impl From<kreuzcrawl::ArticleMetadata> for ArticleMetadata {
|
|
3833
4012
|
fn from(val: kreuzcrawl::ArticleMetadata) -> Self {
|
|
3834
4013
|
Self {
|
|
@@ -3841,6 +4020,7 @@ impl From<kreuzcrawl::ArticleMetadata> for ArticleMetadata {
|
|
|
3841
4020
|
}
|
|
3842
4021
|
}
|
|
3843
4022
|
|
|
4023
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3844
4024
|
impl From<HreflangEntry> for kreuzcrawl::HreflangEntry {
|
|
3845
4025
|
fn from(val: HreflangEntry) -> Self {
|
|
3846
4026
|
Self {
|
|
@@ -3850,6 +4030,7 @@ impl From<HreflangEntry> for kreuzcrawl::HreflangEntry {
|
|
|
3850
4030
|
}
|
|
3851
4031
|
}
|
|
3852
4032
|
|
|
4033
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3853
4034
|
impl From<kreuzcrawl::HreflangEntry> for HreflangEntry {
|
|
3854
4035
|
fn from(val: kreuzcrawl::HreflangEntry) -> Self {
|
|
3855
4036
|
Self {
|
|
@@ -3859,6 +4040,7 @@ impl From<kreuzcrawl::HreflangEntry> for HreflangEntry {
|
|
|
3859
4040
|
}
|
|
3860
4041
|
}
|
|
3861
4042
|
|
|
4043
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3862
4044
|
impl From<FaviconInfo> for kreuzcrawl::FaviconInfo {
|
|
3863
4045
|
fn from(val: FaviconInfo) -> Self {
|
|
3864
4046
|
Self {
|
|
@@ -3870,6 +4052,7 @@ impl From<FaviconInfo> for kreuzcrawl::FaviconInfo {
|
|
|
3870
4052
|
}
|
|
3871
4053
|
}
|
|
3872
4054
|
|
|
4055
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3873
4056
|
impl From<kreuzcrawl::FaviconInfo> for FaviconInfo {
|
|
3874
4057
|
fn from(val: kreuzcrawl::FaviconInfo) -> Self {
|
|
3875
4058
|
Self {
|
|
@@ -3881,6 +4064,7 @@ impl From<kreuzcrawl::FaviconInfo> for FaviconInfo {
|
|
|
3881
4064
|
}
|
|
3882
4065
|
}
|
|
3883
4066
|
|
|
4067
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3884
4068
|
impl From<HeadingInfo> for kreuzcrawl::HeadingInfo {
|
|
3885
4069
|
fn from(val: HeadingInfo) -> Self {
|
|
3886
4070
|
Self {
|
|
@@ -3890,6 +4074,7 @@ impl From<HeadingInfo> for kreuzcrawl::HeadingInfo {
|
|
|
3890
4074
|
}
|
|
3891
4075
|
}
|
|
3892
4076
|
|
|
4077
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3893
4078
|
impl From<kreuzcrawl::HeadingInfo> for HeadingInfo {
|
|
3894
4079
|
fn from(val: kreuzcrawl::HeadingInfo) -> Self {
|
|
3895
4080
|
Self {
|
|
@@ -3899,6 +4084,7 @@ impl From<kreuzcrawl::HeadingInfo> for HeadingInfo {
|
|
|
3899
4084
|
}
|
|
3900
4085
|
}
|
|
3901
4086
|
|
|
4087
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3902
4088
|
impl From<ResponseMeta> for kreuzcrawl::ResponseMeta {
|
|
3903
4089
|
fn from(val: ResponseMeta) -> Self {
|
|
3904
4090
|
Self {
|
|
@@ -3913,6 +4099,7 @@ impl From<ResponseMeta> for kreuzcrawl::ResponseMeta {
|
|
|
3913
4099
|
}
|
|
3914
4100
|
}
|
|
3915
4101
|
|
|
4102
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3916
4103
|
impl From<kreuzcrawl::ResponseMeta> for ResponseMeta {
|
|
3917
4104
|
fn from(val: kreuzcrawl::ResponseMeta) -> Self {
|
|
3918
4105
|
Self {
|
|
@@ -3927,6 +4114,7 @@ impl From<kreuzcrawl::ResponseMeta> for ResponseMeta {
|
|
|
3927
4114
|
}
|
|
3928
4115
|
}
|
|
3929
4116
|
|
|
4117
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3930
4118
|
impl From<PageMetadata> for kreuzcrawl::PageMetadata {
|
|
3931
4119
|
fn from(val: PageMetadata) -> Self {
|
|
3932
4120
|
Self {
|
|
@@ -3977,6 +4165,7 @@ impl From<PageMetadata> for kreuzcrawl::PageMetadata {
|
|
|
3977
4165
|
}
|
|
3978
4166
|
}
|
|
3979
4167
|
|
|
4168
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
3980
4169
|
impl From<kreuzcrawl::PageMetadata> for PageMetadata {
|
|
3981
4170
|
fn from(val: kreuzcrawl::PageMetadata) -> Self {
|
|
3982
4171
|
Self {
|
|
@@ -4027,6 +4216,7 @@ impl From<kreuzcrawl::PageMetadata> for PageMetadata {
|
|
|
4027
4216
|
}
|
|
4028
4217
|
}
|
|
4029
4218
|
|
|
4219
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4030
4220
|
impl From<CitationResult> for kreuzcrawl::CitationResult {
|
|
4031
4221
|
fn from(val: CitationResult) -> Self {
|
|
4032
4222
|
Self {
|
|
@@ -4036,6 +4226,7 @@ impl From<CitationResult> for kreuzcrawl::CitationResult {
|
|
|
4036
4226
|
}
|
|
4037
4227
|
}
|
|
4038
4228
|
|
|
4229
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4039
4230
|
impl From<kreuzcrawl::CitationResult> for CitationResult {
|
|
4040
4231
|
fn from(val: kreuzcrawl::CitationResult) -> Self {
|
|
4041
4232
|
Self {
|
|
@@ -4045,6 +4236,7 @@ impl From<kreuzcrawl::CitationResult> for CitationResult {
|
|
|
4045
4236
|
}
|
|
4046
4237
|
}
|
|
4047
4238
|
|
|
4239
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4048
4240
|
impl From<CitationReference> for kreuzcrawl::CitationReference {
|
|
4049
4241
|
fn from(val: CitationReference) -> Self {
|
|
4050
4242
|
Self {
|
|
@@ -4055,6 +4247,7 @@ impl From<CitationReference> for kreuzcrawl::CitationReference {
|
|
|
4055
4247
|
}
|
|
4056
4248
|
}
|
|
4057
4249
|
|
|
4250
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4058
4251
|
impl From<kreuzcrawl::CitationReference> for CitationReference {
|
|
4059
4252
|
fn from(val: kreuzcrawl::CitationReference) -> Self {
|
|
4060
4253
|
Self {
|
|
@@ -4065,6 +4258,7 @@ impl From<kreuzcrawl::CitationReference> for CitationReference {
|
|
|
4065
4258
|
}
|
|
4066
4259
|
}
|
|
4067
4260
|
|
|
4261
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4068
4262
|
impl From<BatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
|
|
4069
4263
|
fn from(val: BatchScrapeResult) -> Self {
|
|
4070
4264
|
Self {
|
|
@@ -4075,6 +4269,7 @@ impl From<BatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
|
|
|
4075
4269
|
}
|
|
4076
4270
|
}
|
|
4077
4271
|
|
|
4272
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4078
4273
|
impl From<kreuzcrawl::BatchScrapeResult> for BatchScrapeResult {
|
|
4079
4274
|
fn from(val: kreuzcrawl::BatchScrapeResult) -> Self {
|
|
4080
4275
|
Self {
|
|
@@ -4085,6 +4280,7 @@ impl From<kreuzcrawl::BatchScrapeResult> for BatchScrapeResult {
|
|
|
4085
4280
|
}
|
|
4086
4281
|
}
|
|
4087
4282
|
|
|
4283
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4088
4284
|
impl From<BatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
|
|
4089
4285
|
fn from(val: BatchCrawlResult) -> Self {
|
|
4090
4286
|
Self {
|
|
@@ -4095,6 +4291,7 @@ impl From<BatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
|
|
|
4095
4291
|
}
|
|
4096
4292
|
}
|
|
4097
4293
|
|
|
4294
|
+
#[allow(clippy::redundant_closure, clippy::useless_conversion)]
|
|
4098
4295
|
impl From<kreuzcrawl::BatchCrawlResult> for BatchCrawlResult {
|
|
4099
4296
|
fn from(val: kreuzcrawl::BatchCrawlResult) -> Self {
|
|
4100
4297
|
Self {
|
|
@@ -4288,6 +4485,24 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4288
4485
|
class.define_method("username", method!(ProxyConfig::username, 0))?;
|
|
4289
4486
|
class.define_method("password", method!(ProxyConfig::password, 0))?;
|
|
4290
4487
|
|
|
4488
|
+
let class = module.define_class("ContentConfig", ruby.class_object())?;
|
|
4489
|
+
class.define_singleton_method("new", function!(ContentConfig::new, 12))?;
|
|
4490
|
+
class.define_method("output_format", method!(ContentConfig::output_format, 0))?;
|
|
4491
|
+
class.define_method("preprocessing_preset", method!(ContentConfig::preprocessing_preset, 0))?;
|
|
4492
|
+
class.define_method("remove_navigation", method!(ContentConfig::remove_navigation, 0))?;
|
|
4493
|
+
class.define_method("remove_forms", method!(ContentConfig::remove_forms, 0))?;
|
|
4494
|
+
class.define_method("strip_tags", method!(ContentConfig::strip_tags, 0))?;
|
|
4495
|
+
class.define_method("preserve_tags", method!(ContentConfig::preserve_tags, 0))?;
|
|
4496
|
+
class.define_method("exclude_selectors", method!(ContentConfig::exclude_selectors, 0))?;
|
|
4497
|
+
class.define_method("skip_images", method!(ContentConfig::skip_images, 0))?;
|
|
4498
|
+
class.define_method("max_depth", method!(ContentConfig::max_depth, 0))?;
|
|
4499
|
+
class.define_method("wrap", method!(ContentConfig::wrap, 0))?;
|
|
4500
|
+
class.define_method("wrap_width", method!(ContentConfig::wrap_width, 0))?;
|
|
4501
|
+
class.define_method(
|
|
4502
|
+
"include_document_structure",
|
|
4503
|
+
method!(ContentConfig::include_document_structure, 0),
|
|
4504
|
+
)?;
|
|
4505
|
+
|
|
4291
4506
|
let class = module.define_class("BrowserConfig", ruby.class_object())?;
|
|
4292
4507
|
class.define_singleton_method("new", function!(BrowserConfig::new, 6))?;
|
|
4293
4508
|
class.define_method("mode", method!(BrowserConfig::mode, 0))?;
|
|
@@ -4317,8 +4532,8 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4317
4532
|
class.define_method("cookies_enabled", method!(CrawlConfig::cookies_enabled, 0))?;
|
|
4318
4533
|
class.define_method("auth", method!(CrawlConfig::auth, 0))?;
|
|
4319
4534
|
class.define_method("max_body_size", method!(CrawlConfig::max_body_size, 0))?;
|
|
4320
|
-
class.define_method("main_content_only", method!(CrawlConfig::main_content_only, 0))?;
|
|
4321
4535
|
class.define_method("remove_tags", method!(CrawlConfig::remove_tags, 0))?;
|
|
4536
|
+
class.define_method("content", method!(CrawlConfig::content, 0))?;
|
|
4322
4537
|
class.define_method("map_limit", method!(CrawlConfig::map_limit, 0))?;
|
|
4323
4538
|
class.define_method("map_search", method!(CrawlConfig::map_search, 0))?;
|
|
4324
4539
|
class.define_method("download_assets", method!(CrawlConfig::download_assets, 0))?;
|
|
@@ -4365,7 +4580,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4365
4580
|
class.define_method("is_pdf", method!(ScrapeResult::is_pdf, 0))?;
|
|
4366
4581
|
class.define_method("was_skipped", method!(ScrapeResult::was_skipped, 0))?;
|
|
4367
4582
|
class.define_method("detected_charset", method!(ScrapeResult::detected_charset, 0))?;
|
|
4368
|
-
class.define_method("main_content_only", method!(ScrapeResult::main_content_only, 0))?;
|
|
4369
4583
|
class.define_method("auth_header_sent", method!(ScrapeResult::auth_header_sent, 0))?;
|
|
4370
4584
|
class.define_method("response_meta", method!(ScrapeResult::response_meta, 0))?;
|
|
4371
4585
|
class.define_method("assets", method!(ScrapeResult::assets, 0))?;
|