kreuzcrawl 0.1.2 → 0.3.0.pre.rc.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a2f4027aef59737e93add85d5da2398c4a95767fb41766b86822f50199331e32
4
- data.tar.gz: 82f38cf86e988321ef9bc11f5a42a023df464e79d0d0ad646edeae564cabfec6
3
+ metadata.gz: 92258fdaf73e3d2a7c3d2973905388ea408deb41a035d5d5a35613924c9417da
4
+ data.tar.gz: a907f79b86c3511a7c572dfcf07fa342d19f1e2455c25f47a7ea75b2b543c171
5
5
  SHA512:
6
- metadata.gz: 1ebcebff06f45e809441c0cae1d9f14be48a79b6d1439b3421ebedd4808d9c71e6ffa86d51a6c1d64ba57a0bc2eeffa24a672a4af3db5659185ba5c8cca8443a
7
- data.tar.gz: af29b39fdf985044e579d71064af9eb10dc348c81a82e7f27dc0650c4720ee1164fd8fe5529358cc9ba494b0fc296e9a425b17182794d001b3bcee526f737d35
6
+ metadata.gz: 86aa4b0c8f043de802d7d844167b351b0e140a46b2f2d04664e0710104e3c305b5e156a709aeb0303a534cb7a53818180015c6b82bebc3ce41f84f48ae0feded
7
+ data.tar.gz: 2d3747ecd2e4a410e93f894e1ca16a48ad881735eaed33d0989d8abfd4278cb55970cafa5c925e8ce3b0ef5b2b9d15bdb79827fe8451b1281f6ba9501f9f326b
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzcrawl-rb"
3
- version = "0.1.2"
3
+ version = "0.3.0-rc.2"
4
4
  edition = "2024"
5
5
  license = "Elastic-2.0"
6
6
 
@@ -1,6 +1,10 @@
1
- # This file is auto-generated by alef. DO NOT EDIT.
1
+ # This file is auto-generated by alef DO NOT EDIT.
2
+ # alef:hash:b59e800fdddf59213911a2309f9e365fa6993399f1c12fbe68bd27b269cff2d9
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # Issues & docs: https://github.com/kreuzberg-dev/alef
2
6
  # frozen_string_literal: true
3
7
 
4
8
  module Kreuzcrawl
5
- VERSION = "0.1.2"
9
+ VERSION = "0.3.0-rc.2"
6
10
  end
@@ -1,4 +1,8 @@
1
- # This file is auto-generated by alef. DO NOT EDIT.
1
+ # This file is auto-generated by alef DO NOT EDIT.
2
+ # alef:hash:cb606b2a7daa26b35ced54ce0131bc649210d7c9396f1309bf155a45e52ef34e
3
+ # To regenerate: alef generate
4
+ # To verify freshness: alef verify --exit-code
5
+ # Issues & docs: https://github.com/kreuzberg-dev/alef
2
6
  # frozen_string_literal: true
3
7
 
4
8
  require_relative 'kreuzcrawl/version'
@@ -1,19 +1,7 @@
1
1
  // This file is auto-generated by alef. DO NOT EDIT.
2
+ // alef:hash:38edd0f53a45bf9e74fc6f8570ccc68607d810942abb51abaedc20c4afa2ccda
2
3
  // Re-generate with: alef generate
3
4
  #![allow(dead_code)]
4
- #![allow(
5
- clippy::too_many_arguments,
6
- clippy::let_unit_value,
7
- clippy::needless_borrow,
8
- clippy::map_identity,
9
- clippy::just_underscores_and_digits,
10
- clippy::unused_unit,
11
- clippy::unnecessary_cast,
12
- clippy::unwrap_or_default,
13
- clippy::derivable_impls,
14
- clippy::needless_borrows_for_generic_args,
15
- clippy::unnecessary_fallible_conversions
16
- )]
17
5
 
18
6
  use magnus::{Error, IntoValueFromNative, Ruby, function, method, prelude::*, try_convert::TryConvertOwned};
19
7
  use std::collections::HashMap;
@@ -175,6 +163,133 @@ impl ProxyConfig {
175
163
  }
176
164
  }
177
165
 
166
+ #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
167
+ #[magnus::wrap(class = "Kreuzcrawl::ContentConfig")]
168
+ #[serde(default)]
169
+ pub struct ContentConfig {
170
+ pub output_format: String,
171
+ pub preprocessing_preset: String,
172
+ pub remove_navigation: bool,
173
+ pub remove_forms: bool,
174
+ pub strip_tags: Vec<String>,
175
+ pub preserve_tags: Vec<String>,
176
+ pub exclude_selectors: Vec<String>,
177
+ pub skip_images: bool,
178
+ pub max_depth: Option<usize>,
179
+ pub wrap: bool,
180
+ pub wrap_width: usize,
181
+ pub include_document_structure: bool,
182
+ }
183
+
184
+ unsafe impl IntoValueFromNative for ContentConfig {}
185
+
186
+ impl magnus::TryConvert for ContentConfig {
187
+ fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
188
+ let r: &ContentConfig = magnus::TryConvert::try_convert(val)?;
189
+ Ok(r.clone())
190
+ }
191
+ }
192
+ unsafe impl TryConvertOwned for ContentConfig {}
193
+
194
+ impl Default for ContentConfig {
195
+ fn default() -> Self {
196
+ Self {
197
+ output_format: Default::default(),
198
+ preprocessing_preset: Default::default(),
199
+ remove_navigation: Default::default(),
200
+ remove_forms: Default::default(),
201
+ strip_tags: Default::default(),
202
+ preserve_tags: Default::default(),
203
+ exclude_selectors: Default::default(),
204
+ skip_images: Default::default(),
205
+ max_depth: Default::default(),
206
+ wrap: Default::default(),
207
+ wrap_width: Default::default(),
208
+ include_document_structure: Default::default(),
209
+ }
210
+ }
211
+ }
212
+
213
+ impl ContentConfig {
214
+ fn new(
215
+ output_format: Option<String>,
216
+ preprocessing_preset: Option<String>,
217
+ remove_navigation: Option<bool>,
218
+ remove_forms: Option<bool>,
219
+ strip_tags: Option<Vec<String>>,
220
+ preserve_tags: Option<Vec<String>>,
221
+ exclude_selectors: Option<Vec<String>>,
222
+ skip_images: Option<bool>,
223
+ max_depth: Option<usize>,
224
+ wrap: Option<bool>,
225
+ wrap_width: Option<usize>,
226
+ include_document_structure: Option<bool>,
227
+ ) -> Self {
228
+ Self {
229
+ output_format: output_format.unwrap_or("markdown".to_string()),
230
+ preprocessing_preset: preprocessing_preset.unwrap_or("standard".to_string()),
231
+ remove_navigation: remove_navigation.unwrap_or(true),
232
+ remove_forms: remove_forms.unwrap_or(true),
233
+ strip_tags: strip_tags.unwrap_or_default(),
234
+ preserve_tags: preserve_tags.unwrap_or_default(),
235
+ exclude_selectors: exclude_selectors.unwrap_or_default(),
236
+ skip_images: skip_images.unwrap_or(false),
237
+ max_depth,
238
+ wrap: wrap.unwrap_or(false),
239
+ wrap_width: wrap_width.unwrap_or(80),
240
+ include_document_structure: include_document_structure.unwrap_or(true),
241
+ }
242
+ }
243
+
244
+ fn output_format(&self) -> String {
245
+ self.output_format.clone()
246
+ }
247
+
248
+ fn preprocessing_preset(&self) -> String {
249
+ self.preprocessing_preset.clone()
250
+ }
251
+
252
+ fn remove_navigation(&self) -> bool {
253
+ self.remove_navigation
254
+ }
255
+
256
+ fn remove_forms(&self) -> bool {
257
+ self.remove_forms
258
+ }
259
+
260
+ fn strip_tags(&self) -> Vec<String> {
261
+ self.strip_tags.clone()
262
+ }
263
+
264
+ fn preserve_tags(&self) -> Vec<String> {
265
+ self.preserve_tags.clone()
266
+ }
267
+
268
+ fn exclude_selectors(&self) -> Vec<String> {
269
+ self.exclude_selectors.clone()
270
+ }
271
+
272
+ fn skip_images(&self) -> bool {
273
+ self.skip_images
274
+ }
275
+
276
+ fn max_depth(&self) -> Option<usize> {
277
+ self.max_depth
278
+ }
279
+
280
+ fn wrap(&self) -> bool {
281
+ self.wrap
282
+ }
283
+
284
+ fn wrap_width(&self) -> usize {
285
+ self.wrap_width
286
+ }
287
+
288
+ fn include_document_structure(&self) -> bool {
289
+ self.include_document_structure
290
+ }
291
+ }
292
+
178
293
  #[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
179
294
  #[magnus::wrap(class = "Kreuzcrawl::BrowserConfig")]
180
295
  #[serde(default)]
@@ -263,8 +378,8 @@ pub struct CrawlConfig {
263
378
  pub cookies_enabled: bool,
264
379
  pub auth: Option<AuthConfig>,
265
380
  pub max_body_size: Option<usize>,
266
- pub main_content_only: bool,
267
381
  pub remove_tags: Vec<String>,
382
+ pub content: ContentConfig,
268
383
  pub map_limit: Option<usize>,
269
384
  pub map_search: Option<String>,
270
385
  pub download_assets: bool,
@@ -361,14 +476,14 @@ impl CrawlConfig {
361
476
  max_body_size: kwargs
362
477
  .get(ruby.to_symbol("max_body_size"))
363
478
  .and_then(|v| usize::try_convert(v).ok()),
364
- main_content_only: kwargs
365
- .get(ruby.to_symbol("main_content_only"))
366
- .and_then(|v| bool::try_convert(v).ok())
367
- .unwrap_or(false),
368
479
  remove_tags: kwargs
369
480
  .get(ruby.to_symbol("remove_tags"))
370
481
  .and_then(|v| <Vec<String>>::try_convert(v).ok())
371
482
  .unwrap_or_default(),
483
+ content: kwargs
484
+ .get(ruby.to_symbol("content"))
485
+ .and_then(|v| ContentConfig::try_convert(v).ok())
486
+ .unwrap_or_default(),
372
487
  map_limit: kwargs
373
488
  .get(ruby.to_symbol("map_limit"))
374
489
  .and_then(|v| usize::try_convert(v).ok()),
@@ -497,14 +612,14 @@ impl CrawlConfig {
497
612
  self.max_body_size
498
613
  }
499
614
 
500
- fn main_content_only(&self) -> bool {
501
- self.main_content_only
502
- }
503
-
504
615
  fn remove_tags(&self) -> Vec<String> {
505
616
  self.remove_tags.clone()
506
617
  }
507
618
 
619
+ fn content(&self) -> ContentConfig {
620
+ self.content.clone()
621
+ }
622
+
508
623
  fn map_limit(&self) -> Option<usize> {
509
624
  self.map_limit
510
625
  }
@@ -586,8 +701,8 @@ impl CrawlConfig {
586
701
  cookies_enabled: self.cookies_enabled,
587
702
  auth: self.auth.clone().map(Into::into),
588
703
  max_body_size: self.max_body_size,
589
- main_content_only: self.main_content_only,
590
704
  remove_tags: self.remove_tags.clone(),
705
+ content: self.content.clone().into(),
591
706
  map_limit: self.map_limit,
592
707
  map_search: self.map_search.clone(),
593
708
  download_assets: self.download_assets,
@@ -723,7 +838,6 @@ pub struct ScrapeResult {
723
838
  pub is_pdf: bool,
724
839
  pub was_skipped: bool,
725
840
  pub detected_charset: Option<String>,
726
- pub main_content_only: bool,
727
841
  pub auth_header_sent: bool,
728
842
  pub response_meta: Option<ResponseMeta>,
729
843
  pub assets: Vec<DownloadedAsset>,
@@ -766,7 +880,6 @@ impl Default for ScrapeResult {
766
880
  is_pdf: Default::default(),
767
881
  was_skipped: Default::default(),
768
882
  detected_charset: Default::default(),
769
- main_content_only: Default::default(),
770
883
  auth_header_sent: Default::default(),
771
884
  response_meta: Default::default(),
772
885
  assets: Default::default(),
@@ -850,10 +963,6 @@ impl ScrapeResult {
850
963
  detected_charset: kwargs
851
964
  .get(ruby.to_symbol("detected_charset"))
852
965
  .and_then(|v| String::try_convert(v).ok()),
853
- main_content_only: kwargs
854
- .get(ruby.to_symbol("main_content_only"))
855
- .and_then(|v| bool::try_convert(v).ok())
856
- .unwrap_or_default(),
857
966
  auth_header_sent: kwargs
858
967
  .get(ruby.to_symbol("auth_header_sent"))
859
968
  .and_then(|v| bool::try_convert(v).ok())
@@ -959,10 +1068,6 @@ impl ScrapeResult {
959
1068
  self.detected_charset.clone()
960
1069
  }
961
1070
 
962
- fn main_content_only(&self) -> bool {
963
- self.main_content_only
964
- }
965
-
966
1071
  fn auth_header_sent(&self) -> bool {
967
1072
  self.auth_header_sent
968
1073
  }
@@ -3270,6 +3375,7 @@ fn batch_crawl_async(engine: CrawlEngineHandle, urls: Vec<String>) -> Result<Vec
3270
3375
  Ok(result.into_iter().map(Into::into).collect())
3271
3376
  }
3272
3377
 
3378
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3273
3379
  impl From<ExtractionMeta> for kreuzcrawl::ExtractionMeta {
3274
3380
  fn from(val: ExtractionMeta) -> Self {
3275
3381
  Self {
@@ -3282,6 +3388,7 @@ impl From<ExtractionMeta> for kreuzcrawl::ExtractionMeta {
3282
3388
  }
3283
3389
  }
3284
3390
 
3391
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3285
3392
  impl From<kreuzcrawl::ExtractionMeta> for ExtractionMeta {
3286
3393
  fn from(val: kreuzcrawl::ExtractionMeta) -> Self {
3287
3394
  Self {
@@ -3294,6 +3401,7 @@ impl From<kreuzcrawl::ExtractionMeta> for ExtractionMeta {
3294
3401
  }
3295
3402
  }
3296
3403
 
3404
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3297
3405
  impl From<ProxyConfig> for kreuzcrawl::ProxyConfig {
3298
3406
  fn from(val: ProxyConfig) -> Self {
3299
3407
  Self {
@@ -3304,6 +3412,7 @@ impl From<ProxyConfig> for kreuzcrawl::ProxyConfig {
3304
3412
  }
3305
3413
  }
3306
3414
 
3415
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3307
3416
  impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
3308
3417
  fn from(val: kreuzcrawl::ProxyConfig) -> Self {
3309
3418
  Self {
@@ -3314,6 +3423,47 @@ impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
3314
3423
  }
3315
3424
  }
3316
3425
 
3426
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3427
+ impl From<ContentConfig> for kreuzcrawl::ContentConfig {
3428
+ fn from(val: ContentConfig) -> Self {
3429
+ Self {
3430
+ output_format: val.output_format,
3431
+ preprocessing_preset: val.preprocessing_preset,
3432
+ remove_navigation: val.remove_navigation,
3433
+ remove_forms: val.remove_forms,
3434
+ strip_tags: val.strip_tags,
3435
+ preserve_tags: val.preserve_tags,
3436
+ exclude_selectors: val.exclude_selectors,
3437
+ skip_images: val.skip_images,
3438
+ max_depth: val.max_depth,
3439
+ wrap: val.wrap,
3440
+ wrap_width: val.wrap_width,
3441
+ include_document_structure: val.include_document_structure,
3442
+ }
3443
+ }
3444
+ }
3445
+
3446
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3447
+ impl From<kreuzcrawl::ContentConfig> for ContentConfig {
3448
+ fn from(val: kreuzcrawl::ContentConfig) -> Self {
3449
+ Self {
3450
+ output_format: val.output_format,
3451
+ preprocessing_preset: val.preprocessing_preset,
3452
+ remove_navigation: val.remove_navigation,
3453
+ remove_forms: val.remove_forms,
3454
+ strip_tags: val.strip_tags,
3455
+ preserve_tags: val.preserve_tags,
3456
+ exclude_selectors: val.exclude_selectors,
3457
+ skip_images: val.skip_images,
3458
+ max_depth: val.max_depth,
3459
+ wrap: val.wrap,
3460
+ wrap_width: val.wrap_width,
3461
+ include_document_structure: val.include_document_structure,
3462
+ }
3463
+ }
3464
+ }
3465
+
3466
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3317
3467
  impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
3318
3468
  fn from(val: BrowserConfig) -> Self {
3319
3469
  Self {
@@ -3327,6 +3477,7 @@ impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
3327
3477
  }
3328
3478
  }
3329
3479
 
3480
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3330
3481
  impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
3331
3482
  fn from(val: kreuzcrawl::BrowserConfig) -> Self {
3332
3483
  Self {
@@ -3341,6 +3492,7 @@ impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
3341
3492
  }
3342
3493
 
3343
3494
  #[allow(clippy::needless_update)]
3495
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3344
3496
  impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
3345
3497
  fn from(val: CrawlConfig) -> Self {
3346
3498
  Self {
@@ -3362,8 +3514,8 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
3362
3514
  cookies_enabled: val.cookies_enabled,
3363
3515
  auth: val.auth.map(Into::into),
3364
3516
  max_body_size: val.max_body_size,
3365
- main_content_only: val.main_content_only,
3366
3517
  remove_tags: val.remove_tags,
3518
+ content: val.content.into(),
3367
3519
  map_limit: val.map_limit,
3368
3520
  map_search: val.map_search,
3369
3521
  download_assets: val.download_assets,
@@ -3384,6 +3536,7 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
3384
3536
  }
3385
3537
  }
3386
3538
 
3539
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3387
3540
  impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
3388
3541
  fn from(val: kreuzcrawl::CrawlConfig) -> Self {
3389
3542
  Self {
@@ -3405,8 +3558,8 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
3405
3558
  cookies_enabled: val.cookies_enabled,
3406
3559
  auth: val.auth.map(Into::into),
3407
3560
  max_body_size: val.max_body_size,
3408
- main_content_only: val.main_content_only,
3409
3561
  remove_tags: val.remove_tags,
3562
+ content: val.content.into(),
3410
3563
  map_limit: val.map_limit,
3411
3564
  map_search: val.map_search,
3412
3565
  download_assets: val.download_assets,
@@ -3426,12 +3579,13 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
3426
3579
  }
3427
3580
  }
3428
3581
 
3582
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3429
3583
  impl From<DownloadedDocument> for kreuzcrawl::DownloadedDocument {
3430
3584
  fn from(val: DownloadedDocument) -> Self {
3431
3585
  Self {
3432
3586
  url: val.url,
3433
3587
  mime_type: Default::default(),
3434
- content: val.content,
3588
+ content: val.content.into(),
3435
3589
  size: val.size,
3436
3590
  filename: Default::default(),
3437
3591
  content_hash: Default::default(),
@@ -3440,6 +3594,7 @@ impl From<DownloadedDocument> for kreuzcrawl::DownloadedDocument {
3440
3594
  }
3441
3595
  }
3442
3596
 
3597
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3443
3598
  impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
3444
3599
  fn from(val: kreuzcrawl::DownloadedDocument) -> Self {
3445
3600
  Self {
@@ -3458,6 +3613,7 @@ impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
3458
3613
  }
3459
3614
  }
3460
3615
 
3616
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3461
3617
  impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
3462
3618
  fn from(val: ScrapeResult) -> Self {
3463
3619
  Self {
@@ -3478,7 +3634,6 @@ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
3478
3634
  is_pdf: val.is_pdf,
3479
3635
  was_skipped: val.was_skipped,
3480
3636
  detected_charset: val.detected_charset,
3481
- main_content_only: val.main_content_only,
3482
3637
  auth_header_sent: val.auth_header_sent,
3483
3638
  response_meta: val.response_meta.map(Into::into),
3484
3639
  assets: val.assets.into_iter().map(Into::into).collect(),
@@ -3487,12 +3642,13 @@ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
3487
3642
  markdown: val.markdown.map(Into::into),
3488
3643
  extracted_data: val.extracted_data.as_ref().and_then(|s| serde_json::from_str(s).ok()),
3489
3644
  extraction_meta: val.extraction_meta.map(Into::into),
3490
- screenshot: val.screenshot,
3645
+ screenshot: val.screenshot.map(Into::into),
3491
3646
  downloaded_document: val.downloaded_document.map(Into::into),
3492
3647
  }
3493
3648
  }
3494
3649
  }
3495
3650
 
3651
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3496
3652
  impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
3497
3653
  fn from(val: kreuzcrawl::ScrapeResult) -> Self {
3498
3654
  Self {
@@ -3513,7 +3669,6 @@ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
3513
3669
  is_pdf: val.is_pdf,
3514
3670
  was_skipped: val.was_skipped,
3515
3671
  detected_charset: val.detected_charset,
3516
- main_content_only: val.main_content_only,
3517
3672
  auth_header_sent: val.auth_header_sent,
3518
3673
  response_meta: val.response_meta.map(Into::into),
3519
3674
  assets: val.assets.into_iter().map(Into::into).collect(),
@@ -3528,6 +3683,7 @@ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
3528
3683
  }
3529
3684
  }
3530
3685
 
3686
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3531
3687
  impl From<CrawlPageResult> for kreuzcrawl::CrawlPageResult {
3532
3688
  fn from(val: CrawlPageResult) -> Self {
3533
3689
  Self {
@@ -3555,6 +3711,7 @@ impl From<CrawlPageResult> for kreuzcrawl::CrawlPageResult {
3555
3711
  }
3556
3712
  }
3557
3713
 
3714
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3558
3715
  impl From<kreuzcrawl::CrawlPageResult> for CrawlPageResult {
3559
3716
  fn from(val: kreuzcrawl::CrawlPageResult) -> Self {
3560
3717
  Self {
@@ -3582,6 +3739,7 @@ impl From<kreuzcrawl::CrawlPageResult> for CrawlPageResult {
3582
3739
  }
3583
3740
  }
3584
3741
 
3742
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3585
3743
  impl From<CrawlResult> for kreuzcrawl::CrawlResult {
3586
3744
  fn from(val: CrawlResult) -> Self {
3587
3745
  Self {
@@ -3596,6 +3754,7 @@ impl From<CrawlResult> for kreuzcrawl::CrawlResult {
3596
3754
  }
3597
3755
  }
3598
3756
 
3757
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3599
3758
  impl From<kreuzcrawl::CrawlResult> for CrawlResult {
3600
3759
  fn from(val: kreuzcrawl::CrawlResult) -> Self {
3601
3760
  Self {
@@ -3610,6 +3769,7 @@ impl From<kreuzcrawl::CrawlResult> for CrawlResult {
3610
3769
  }
3611
3770
  }
3612
3771
 
3772
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3613
3773
  impl From<SitemapUrl> for kreuzcrawl::SitemapUrl {
3614
3774
  fn from(val: SitemapUrl) -> Self {
3615
3775
  Self {
@@ -3621,6 +3781,7 @@ impl From<SitemapUrl> for kreuzcrawl::SitemapUrl {
3621
3781
  }
3622
3782
  }
3623
3783
 
3784
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3624
3785
  impl From<kreuzcrawl::SitemapUrl> for SitemapUrl {
3625
3786
  fn from(val: kreuzcrawl::SitemapUrl) -> Self {
3626
3787
  Self {
@@ -3632,6 +3793,7 @@ impl From<kreuzcrawl::SitemapUrl> for SitemapUrl {
3632
3793
  }
3633
3794
  }
3634
3795
 
3796
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3635
3797
  impl From<MapResult> for kreuzcrawl::MapResult {
3636
3798
  fn from(val: MapResult) -> Self {
3637
3799
  Self {
@@ -3640,6 +3802,7 @@ impl From<MapResult> for kreuzcrawl::MapResult {
3640
3802
  }
3641
3803
  }
3642
3804
 
3805
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3643
3806
  impl From<kreuzcrawl::MapResult> for MapResult {
3644
3807
  fn from(val: kreuzcrawl::MapResult) -> Self {
3645
3808
  Self {
@@ -3648,6 +3811,7 @@ impl From<kreuzcrawl::MapResult> for MapResult {
3648
3811
  }
3649
3812
  }
3650
3813
 
3814
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3651
3815
  impl From<MarkdownResult> for kreuzcrawl::MarkdownResult {
3652
3816
  fn from(val: MarkdownResult) -> Self {
3653
3817
  Self {
@@ -3668,6 +3832,7 @@ impl From<MarkdownResult> for kreuzcrawl::MarkdownResult {
3668
3832
  }
3669
3833
  }
3670
3834
 
3835
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3671
3836
  impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
3672
3837
  fn from(val: kreuzcrawl::MarkdownResult) -> Self {
3673
3838
  Self {
@@ -3681,6 +3846,7 @@ impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
3681
3846
  }
3682
3847
  }
3683
3848
 
3849
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3684
3850
  impl From<LinkInfo> for kreuzcrawl::LinkInfo {
3685
3851
  fn from(val: LinkInfo) -> Self {
3686
3852
  Self {
@@ -3693,6 +3859,7 @@ impl From<LinkInfo> for kreuzcrawl::LinkInfo {
3693
3859
  }
3694
3860
  }
3695
3861
 
3862
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3696
3863
  impl From<kreuzcrawl::LinkInfo> for LinkInfo {
3697
3864
  fn from(val: kreuzcrawl::LinkInfo) -> Self {
3698
3865
  Self {
@@ -3705,6 +3872,7 @@ impl From<kreuzcrawl::LinkInfo> for LinkInfo {
3705
3872
  }
3706
3873
  }
3707
3874
 
3875
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3708
3876
  impl From<ImageInfo> for kreuzcrawl::ImageInfo {
3709
3877
  fn from(val: ImageInfo) -> Self {
3710
3878
  Self {
@@ -3717,6 +3885,7 @@ impl From<ImageInfo> for kreuzcrawl::ImageInfo {
3717
3885
  }
3718
3886
  }
3719
3887
 
3888
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3720
3889
  impl From<kreuzcrawl::ImageInfo> for ImageInfo {
3721
3890
  fn from(val: kreuzcrawl::ImageInfo) -> Self {
3722
3891
  Self {
@@ -3729,6 +3898,7 @@ impl From<kreuzcrawl::ImageInfo> for ImageInfo {
3729
3898
  }
3730
3899
  }
3731
3900
 
3901
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3732
3902
  impl From<FeedInfo> for kreuzcrawl::FeedInfo {
3733
3903
  fn from(val: FeedInfo) -> Self {
3734
3904
  Self {
@@ -3739,6 +3909,7 @@ impl From<FeedInfo> for kreuzcrawl::FeedInfo {
3739
3909
  }
3740
3910
  }
3741
3911
 
3912
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3742
3913
  impl From<kreuzcrawl::FeedInfo> for FeedInfo {
3743
3914
  fn from(val: kreuzcrawl::FeedInfo) -> Self {
3744
3915
  Self {
@@ -3749,6 +3920,7 @@ impl From<kreuzcrawl::FeedInfo> for FeedInfo {
3749
3920
  }
3750
3921
  }
3751
3922
 
3923
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3752
3924
  impl From<JsonLdEntry> for kreuzcrawl::JsonLdEntry {
3753
3925
  fn from(val: JsonLdEntry) -> Self {
3754
3926
  Self {
@@ -3759,6 +3931,7 @@ impl From<JsonLdEntry> for kreuzcrawl::JsonLdEntry {
3759
3931
  }
3760
3932
  }
3761
3933
 
3934
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3762
3935
  impl From<kreuzcrawl::JsonLdEntry> for JsonLdEntry {
3763
3936
  fn from(val: kreuzcrawl::JsonLdEntry) -> Self {
3764
3937
  Self {
@@ -3769,6 +3942,7 @@ impl From<kreuzcrawl::JsonLdEntry> for JsonLdEntry {
3769
3942
  }
3770
3943
  }
3771
3944
 
3945
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3772
3946
  impl From<CookieInfo> for kreuzcrawl::CookieInfo {
3773
3947
  fn from(val: CookieInfo) -> Self {
3774
3948
  Self {
@@ -3780,6 +3954,7 @@ impl From<CookieInfo> for kreuzcrawl::CookieInfo {
3780
3954
  }
3781
3955
  }
3782
3956
 
3957
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3783
3958
  impl From<kreuzcrawl::CookieInfo> for CookieInfo {
3784
3959
  fn from(val: kreuzcrawl::CookieInfo) -> Self {
3785
3960
  Self {
@@ -3791,6 +3966,7 @@ impl From<kreuzcrawl::CookieInfo> for CookieInfo {
3791
3966
  }
3792
3967
  }
3793
3968
 
3969
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3794
3970
  impl From<DownloadedAsset> for kreuzcrawl::DownloadedAsset {
3795
3971
  fn from(val: DownloadedAsset) -> Self {
3796
3972
  Self {
@@ -3804,6 +3980,7 @@ impl From<DownloadedAsset> for kreuzcrawl::DownloadedAsset {
3804
3980
  }
3805
3981
  }
3806
3982
 
3983
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3807
3984
  impl From<kreuzcrawl::DownloadedAsset> for DownloadedAsset {
3808
3985
  fn from(val: kreuzcrawl::DownloadedAsset) -> Self {
3809
3986
  Self {
@@ -3817,6 +3994,7 @@ impl From<kreuzcrawl::DownloadedAsset> for DownloadedAsset {
3817
3994
  }
3818
3995
  }
3819
3996
 
3997
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3820
3998
  impl From<ArticleMetadata> for kreuzcrawl::ArticleMetadata {
3821
3999
  fn from(val: ArticleMetadata) -> Self {
3822
4000
  Self {
@@ -3829,6 +4007,7 @@ impl From<ArticleMetadata> for kreuzcrawl::ArticleMetadata {
3829
4007
  }
3830
4008
  }
3831
4009
 
4010
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3832
4011
  impl From<kreuzcrawl::ArticleMetadata> for ArticleMetadata {
3833
4012
  fn from(val: kreuzcrawl::ArticleMetadata) -> Self {
3834
4013
  Self {
@@ -3841,6 +4020,7 @@ impl From<kreuzcrawl::ArticleMetadata> for ArticleMetadata {
3841
4020
  }
3842
4021
  }
3843
4022
 
4023
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3844
4024
  impl From<HreflangEntry> for kreuzcrawl::HreflangEntry {
3845
4025
  fn from(val: HreflangEntry) -> Self {
3846
4026
  Self {
@@ -3850,6 +4030,7 @@ impl From<HreflangEntry> for kreuzcrawl::HreflangEntry {
3850
4030
  }
3851
4031
  }
3852
4032
 
4033
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3853
4034
  impl From<kreuzcrawl::HreflangEntry> for HreflangEntry {
3854
4035
  fn from(val: kreuzcrawl::HreflangEntry) -> Self {
3855
4036
  Self {
@@ -3859,6 +4040,7 @@ impl From<kreuzcrawl::HreflangEntry> for HreflangEntry {
3859
4040
  }
3860
4041
  }
3861
4042
 
4043
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3862
4044
  impl From<FaviconInfo> for kreuzcrawl::FaviconInfo {
3863
4045
  fn from(val: FaviconInfo) -> Self {
3864
4046
  Self {
@@ -3870,6 +4052,7 @@ impl From<FaviconInfo> for kreuzcrawl::FaviconInfo {
3870
4052
  }
3871
4053
  }
3872
4054
 
4055
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3873
4056
  impl From<kreuzcrawl::FaviconInfo> for FaviconInfo {
3874
4057
  fn from(val: kreuzcrawl::FaviconInfo) -> Self {
3875
4058
  Self {
@@ -3881,6 +4064,7 @@ impl From<kreuzcrawl::FaviconInfo> for FaviconInfo {
3881
4064
  }
3882
4065
  }
3883
4066
 
4067
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3884
4068
  impl From<HeadingInfo> for kreuzcrawl::HeadingInfo {
3885
4069
  fn from(val: HeadingInfo) -> Self {
3886
4070
  Self {
@@ -3890,6 +4074,7 @@ impl From<HeadingInfo> for kreuzcrawl::HeadingInfo {
3890
4074
  }
3891
4075
  }
3892
4076
 
4077
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3893
4078
  impl From<kreuzcrawl::HeadingInfo> for HeadingInfo {
3894
4079
  fn from(val: kreuzcrawl::HeadingInfo) -> Self {
3895
4080
  Self {
@@ -3899,6 +4084,7 @@ impl From<kreuzcrawl::HeadingInfo> for HeadingInfo {
3899
4084
  }
3900
4085
  }
3901
4086
 
4087
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3902
4088
  impl From<ResponseMeta> for kreuzcrawl::ResponseMeta {
3903
4089
  fn from(val: ResponseMeta) -> Self {
3904
4090
  Self {
@@ -3913,6 +4099,7 @@ impl From<ResponseMeta> for kreuzcrawl::ResponseMeta {
3913
4099
  }
3914
4100
  }
3915
4101
 
4102
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3916
4103
  impl From<kreuzcrawl::ResponseMeta> for ResponseMeta {
3917
4104
  fn from(val: kreuzcrawl::ResponseMeta) -> Self {
3918
4105
  Self {
@@ -3927,6 +4114,7 @@ impl From<kreuzcrawl::ResponseMeta> for ResponseMeta {
3927
4114
  }
3928
4115
  }
3929
4116
 
4117
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3930
4118
  impl From<PageMetadata> for kreuzcrawl::PageMetadata {
3931
4119
  fn from(val: PageMetadata) -> Self {
3932
4120
  Self {
@@ -3977,6 +4165,7 @@ impl From<PageMetadata> for kreuzcrawl::PageMetadata {
3977
4165
  }
3978
4166
  }
3979
4167
 
4168
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
3980
4169
  impl From<kreuzcrawl::PageMetadata> for PageMetadata {
3981
4170
  fn from(val: kreuzcrawl::PageMetadata) -> Self {
3982
4171
  Self {
@@ -4027,6 +4216,7 @@ impl From<kreuzcrawl::PageMetadata> for PageMetadata {
4027
4216
  }
4028
4217
  }
4029
4218
 
4219
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4030
4220
  impl From<CitationResult> for kreuzcrawl::CitationResult {
4031
4221
  fn from(val: CitationResult) -> Self {
4032
4222
  Self {
@@ -4036,6 +4226,7 @@ impl From<CitationResult> for kreuzcrawl::CitationResult {
4036
4226
  }
4037
4227
  }
4038
4228
 
4229
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4039
4230
  impl From<kreuzcrawl::CitationResult> for CitationResult {
4040
4231
  fn from(val: kreuzcrawl::CitationResult) -> Self {
4041
4232
  Self {
@@ -4045,6 +4236,7 @@ impl From<kreuzcrawl::CitationResult> for CitationResult {
4045
4236
  }
4046
4237
  }
4047
4238
 
4239
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4048
4240
  impl From<CitationReference> for kreuzcrawl::CitationReference {
4049
4241
  fn from(val: CitationReference) -> Self {
4050
4242
  Self {
@@ -4055,6 +4247,7 @@ impl From<CitationReference> for kreuzcrawl::CitationReference {
4055
4247
  }
4056
4248
  }
4057
4249
 
4250
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4058
4251
  impl From<kreuzcrawl::CitationReference> for CitationReference {
4059
4252
  fn from(val: kreuzcrawl::CitationReference) -> Self {
4060
4253
  Self {
@@ -4065,6 +4258,7 @@ impl From<kreuzcrawl::CitationReference> for CitationReference {
4065
4258
  }
4066
4259
  }
4067
4260
 
4261
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4068
4262
  impl From<BatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
4069
4263
  fn from(val: BatchScrapeResult) -> Self {
4070
4264
  Self {
@@ -4075,6 +4269,7 @@ impl From<BatchScrapeResult> for kreuzcrawl::BatchScrapeResult {
4075
4269
  }
4076
4270
  }
4077
4271
 
4272
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4078
4273
  impl From<kreuzcrawl::BatchScrapeResult> for BatchScrapeResult {
4079
4274
  fn from(val: kreuzcrawl::BatchScrapeResult) -> Self {
4080
4275
  Self {
@@ -4085,6 +4280,7 @@ impl From<kreuzcrawl::BatchScrapeResult> for BatchScrapeResult {
4085
4280
  }
4086
4281
  }
4087
4282
 
4283
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4088
4284
  impl From<BatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
4089
4285
  fn from(val: BatchCrawlResult) -> Self {
4090
4286
  Self {
@@ -4095,6 +4291,7 @@ impl From<BatchCrawlResult> for kreuzcrawl::BatchCrawlResult {
4095
4291
  }
4096
4292
  }
4097
4293
 
4294
+ #[allow(clippy::redundant_closure, clippy::useless_conversion)]
4098
4295
  impl From<kreuzcrawl::BatchCrawlResult> for BatchCrawlResult {
4099
4296
  fn from(val: kreuzcrawl::BatchCrawlResult) -> Self {
4100
4297
  Self {
@@ -4288,6 +4485,24 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
4288
4485
  class.define_method("username", method!(ProxyConfig::username, 0))?;
4289
4486
  class.define_method("password", method!(ProxyConfig::password, 0))?;
4290
4487
 
4488
+ let class = module.define_class("ContentConfig", ruby.class_object())?;
4489
+ class.define_singleton_method("new", function!(ContentConfig::new, 12))?;
4490
+ class.define_method("output_format", method!(ContentConfig::output_format, 0))?;
4491
+ class.define_method("preprocessing_preset", method!(ContentConfig::preprocessing_preset, 0))?;
4492
+ class.define_method("remove_navigation", method!(ContentConfig::remove_navigation, 0))?;
4493
+ class.define_method("remove_forms", method!(ContentConfig::remove_forms, 0))?;
4494
+ class.define_method("strip_tags", method!(ContentConfig::strip_tags, 0))?;
4495
+ class.define_method("preserve_tags", method!(ContentConfig::preserve_tags, 0))?;
4496
+ class.define_method("exclude_selectors", method!(ContentConfig::exclude_selectors, 0))?;
4497
+ class.define_method("skip_images", method!(ContentConfig::skip_images, 0))?;
4498
+ class.define_method("max_depth", method!(ContentConfig::max_depth, 0))?;
4499
+ class.define_method("wrap", method!(ContentConfig::wrap, 0))?;
4500
+ class.define_method("wrap_width", method!(ContentConfig::wrap_width, 0))?;
4501
+ class.define_method(
4502
+ "include_document_structure",
4503
+ method!(ContentConfig::include_document_structure, 0),
4504
+ )?;
4505
+
4291
4506
  let class = module.define_class("BrowserConfig", ruby.class_object())?;
4292
4507
  class.define_singleton_method("new", function!(BrowserConfig::new, 6))?;
4293
4508
  class.define_method("mode", method!(BrowserConfig::mode, 0))?;
@@ -4317,8 +4532,8 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
4317
4532
  class.define_method("cookies_enabled", method!(CrawlConfig::cookies_enabled, 0))?;
4318
4533
  class.define_method("auth", method!(CrawlConfig::auth, 0))?;
4319
4534
  class.define_method("max_body_size", method!(CrawlConfig::max_body_size, 0))?;
4320
- class.define_method("main_content_only", method!(CrawlConfig::main_content_only, 0))?;
4321
4535
  class.define_method("remove_tags", method!(CrawlConfig::remove_tags, 0))?;
4536
+ class.define_method("content", method!(CrawlConfig::content, 0))?;
4322
4537
  class.define_method("map_limit", method!(CrawlConfig::map_limit, 0))?;
4323
4538
  class.define_method("map_search", method!(CrawlConfig::map_search, 0))?;
4324
4539
  class.define_method("download_assets", method!(CrawlConfig::download_assets, 0))?;
@@ -4365,7 +4580,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
4365
4580
  class.define_method("is_pdf", method!(ScrapeResult::is_pdf, 0))?;
4366
4581
  class.define_method("was_skipped", method!(ScrapeResult::was_skipped, 0))?;
4367
4582
  class.define_method("detected_charset", method!(ScrapeResult::detected_charset, 0))?;
4368
- class.define_method("main_content_only", method!(ScrapeResult::main_content_only, 0))?;
4369
4583
  class.define_method("auth_header_sent", method!(ScrapeResult::auth_header_sent, 0))?;
4370
4584
  class.define_method("response_meta", method!(ScrapeResult::response_meta, 0))?;
4371
4585
  class.define_method("assets", method!(ScrapeResult::assets, 0))?;
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.3.0.pre.rc.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kreuzberg Team