kreuzcrawl 0.3.0.pre.rc.55 → 0.3.0.pre.rc.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: fa116f68f2098dce4abbe8ff92f44b9cff4b805c002d3cc8b8878c0124bb63c7
4
- data.tar.gz: 05e13d15d8bdb411f82e08645e56b25fe50f2aeb03e7ca47556daf8a838f6079
3
+ metadata.gz: 119450f2df01a74e99d218b681740eb7367fccbfaea5754437df4f266b765a2e
4
+ data.tar.gz: ec3bd96a79e88e273c3c75a19e1e1630a46b7dde9f205455e7b64cf642a5e3ad
5
5
  SHA512:
6
- metadata.gz: f759525a4253d622f2f52851aae6349e56d8e9c0edd6573aafa3b89551b582f57983a7fbe153ad61689584372aeeb396fd7a982ff7d84435021002ee6bac1e3f
7
- data.tar.gz: '014803027bbf92c00ec72c96df1a587244752ebc6579a5508781baca9441ce521918547fe8a956fed933f072fbf4b4e245d20d3d4e6f144bc9513c7612748dcc'
6
+ metadata.gz: 473048d12a1cde342c72722cdd730efa94d95ab48583261f869cb68ab913805d44f821424d8ac3ede99f132400f6d79ba79d72ed9002faa454b1d17b3692d712
7
+ data.tar.gz: '0109b1c7a3f3430d1396127924b17937de44c7d9a699e3c6bd61acc2a27d4169510fe7fd027f6726eff3016a976c5d64dcd61b065d70915efae18cc55dd9f5e1'
@@ -280,9 +280,9 @@ checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
280
280
 
281
281
  [[package]]
282
282
  name = "cc"
283
- version = "1.2.63"
283
+ version = "1.2.64"
284
284
  source = "registry+https://github.com/rust-lang/crates.io-index"
285
- checksum = "556e016178bb5662a08681bbe0f00f8e17631781a4dfc8c45e466e4b185ec27f"
285
+ checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f"
286
286
  dependencies = [
287
287
  "find-msvc-tools",
288
288
  "jobserver",
@@ -1013,19 +1013,21 @@ dependencies = [
1013
1013
 
1014
1014
  [[package]]
1015
1015
  name = "html-to-markdown-rs"
1016
- version = "3.5.7"
1016
+ version = "3.6.1"
1017
1017
  source = "registry+https://github.com/rust-lang/crates.io-index"
1018
- checksum = "6cbbfb183e8634cb956309c6bbd781d9ddae068d376fb9eb1451ac49cf4cbba7"
1018
+ checksum = "567f72be80982d7d1c9d46730506847e8782e62ff95bf8d3ede3489b3a8e963a"
1019
1019
  dependencies = [
1020
1020
  "ahash",
1021
1021
  "astral-tl",
1022
1022
  "base64",
1023
+ "bitflags",
1023
1024
  "html-escape",
1024
1025
  "html5ever",
1025
1026
  "image",
1026
1027
  "lru",
1027
1028
  "memchr",
1028
1029
  "once_cell",
1030
+ "phf",
1029
1031
  "regex",
1030
1032
  "serde",
1031
1033
  "serde_json",
@@ -1426,9 +1428,9 @@ dependencies = [
1426
1428
 
1427
1429
  [[package]]
1428
1430
  name = "js-sys"
1429
- version = "0.3.100"
1431
+ version = "0.3.102"
1430
1432
  source = "registry+https://github.com/rust-lang/crates.io-index"
1431
- checksum = "f2025f20d7a4fa7785846e7b63d10a76d3f1cee98ee5cb79ea59703f95e42162"
1433
+ checksum = "03d04c30968dffe80775bd4d7fb676131cd04a1fb46d2686dbffbaec2d9dfd31"
1432
1434
  dependencies = [
1433
1435
  "cfg-if",
1434
1436
  "futures-util",
@@ -1457,9 +1459,9 @@ dependencies = [
1457
1459
 
1458
1460
  [[package]]
1459
1461
  name = "kreuzcrawl"
1460
- version = "0.3.0-rc.55"
1462
+ version = "0.3.0-rc.59"
1461
1463
  source = "registry+https://github.com/rust-lang/crates.io-index"
1462
- checksum = "0d82133a5044ee918cd9f2c3523bf2cae2f9efedb4503039bfbb4f583f509969"
1464
+ checksum = "b7e01bcf1ea739b309c26f952a9ee4981d9080e9915863dc92ba3803df265c0e"
1463
1465
  dependencies = [
1464
1466
  "ahash",
1465
1467
  "aho-corasick",
@@ -1480,6 +1482,7 @@ dependencies = [
1480
1482
  "memchr",
1481
1483
  "notify",
1482
1484
  "opentelemetry",
1485
+ "opentelemetry-semantic-conventions",
1483
1486
  "quick-xml",
1484
1487
  "regex",
1485
1488
  "reqwest",
@@ -1491,12 +1494,13 @@ dependencies = [
1491
1494
  "tokio-stream",
1492
1495
  "toml",
1493
1496
  "tower",
1497
+ "tracing",
1494
1498
  "url",
1495
1499
  ]
1496
1500
 
1497
1501
  [[package]]
1498
1502
  name = "kreuzcrawl-rb"
1499
- version = "0.3.0-rc.55"
1503
+ version = "0.3.0-rc.59"
1500
1504
  dependencies = [
1501
1505
  "futures",
1502
1506
  "kreuzcrawl",
@@ -1754,6 +1758,12 @@ dependencies = [
1754
1758
  "tracing",
1755
1759
  ]
1756
1760
 
1761
+ [[package]]
1762
+ name = "opentelemetry-semantic-conventions"
1763
+ version = "0.32.0"
1764
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1765
+ checksum = "6ca2f98a0437b427b4b08f19f1caa3c44db885a202bc12cfea13d6c702243d68"
1766
+
1757
1767
  [[package]]
1758
1768
  name = "option-ext"
1759
1769
  version = "0.2.0"
@@ -1795,6 +1805,7 @@ version = "0.13.1"
1795
1805
  source = "registry+https://github.com/rust-lang/crates.io-index"
1796
1806
  checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
1797
1807
  dependencies = [
1808
+ "phf_macros",
1798
1809
  "phf_shared",
1799
1810
  "serde",
1800
1811
  ]
@@ -1819,6 +1830,19 @@ dependencies = [
1819
1830
  "phf_shared",
1820
1831
  ]
1821
1832
 
1833
+ [[package]]
1834
+ name = "phf_macros"
1835
+ version = "0.13.1"
1836
+ source = "registry+https://github.com/rust-lang/crates.io-index"
1837
+ checksum = "812f032b54b1e759ccd5f8b6677695d5268c588701effba24601f6932f8269ef"
1838
+ dependencies = [
1839
+ "phf_generator",
1840
+ "phf_shared",
1841
+ "proc-macro2",
1842
+ "quote",
1843
+ "syn",
1844
+ ]
1845
+
1822
1846
  [[package]]
1823
1847
  name = "phf_shared"
1824
1848
  version = "0.13.1"
@@ -2955,9 +2979,9 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
2955
2979
 
2956
2980
  [[package]]
2957
2981
  name = "wasip2"
2958
- version = "1.0.3+wasi-0.2.9"
2982
+ version = "1.0.4+wasi-0.2.12"
2959
2983
  source = "registry+https://github.com/rust-lang/crates.io-index"
2960
- checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6"
2984
+ checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487"
2961
2985
  dependencies = [
2962
2986
  "wit-bindgen 0.57.1",
2963
2987
  ]
@@ -2973,9 +2997,9 @@ dependencies = [
2973
2997
 
2974
2998
  [[package]]
2975
2999
  name = "wasm-bindgen"
2976
- version = "0.2.123"
3000
+ version = "0.2.125"
2977
3001
  source = "registry+https://github.com/rust-lang/crates.io-index"
2978
- checksum = "a254a4b10c19a76f09a27640e7ffbf9bc30bf67e16a3bf28aaefa4920fe81563"
3002
+ checksum = "8ddb3f79143bced6de84270411622a2699cee572fc0875aeaf1e7867cf9fca1a"
2979
3003
  dependencies = [
2980
3004
  "cfg-if",
2981
3005
  "once_cell",
@@ -2986,9 +3010,9 @@ dependencies = [
2986
3010
 
2987
3011
  [[package]]
2988
3012
  name = "wasm-bindgen-futures"
2989
- version = "0.4.73"
3013
+ version = "0.4.75"
2990
3014
  source = "registry+https://github.com/rust-lang/crates.io-index"
2991
- checksum = "54568702fabf5d4849ce2b90fadfa64168a097eaf4b351ce9df8b687a0086aaf"
3015
+ checksum = "503b14d284f2c8dac03b819967e155ea753f573586193b2b2c95990cb5d69280"
2992
3016
  dependencies = [
2993
3017
  "js-sys",
2994
3018
  "wasm-bindgen",
@@ -2996,9 +3020,9 @@ dependencies = [
2996
3020
 
2997
3021
  [[package]]
2998
3022
  name = "wasm-bindgen-macro"
2999
- version = "0.2.123"
3023
+ version = "0.2.125"
3000
3024
  source = "registry+https://github.com/rust-lang/crates.io-index"
3001
- checksum = "24a40fc75b0ec6f3746ceb10d36f53a93dcd68a93b11b6445983945d79eba0dc"
3025
+ checksum = "4e21a184b13fb19e157296e2c46056aec9092264fab83e4ba59e68c61b323c3d"
3002
3026
  dependencies = [
3003
3027
  "quote",
3004
3028
  "wasm-bindgen-macro-support",
@@ -3006,9 +3030,9 @@ dependencies = [
3006
3030
 
3007
3031
  [[package]]
3008
3032
  name = "wasm-bindgen-macro-support"
3009
- version = "0.2.123"
3033
+ version = "0.2.125"
3010
3034
  source = "registry+https://github.com/rust-lang/crates.io-index"
3011
- checksum = "908f34bd9b9ce3d4caf07b72dfab63d61504d156856c6bd3cd87fa350cf3985b"
3035
+ checksum = "fecefd9c35bd935a20fc3fc344b5f29138961e4f47fb03297d88f2587afb5ebd"
3012
3036
  dependencies = [
3013
3037
  "bumpalo",
3014
3038
  "proc-macro2",
@@ -3019,9 +3043,9 @@ dependencies = [
3019
3043
 
3020
3044
  [[package]]
3021
3045
  name = "wasm-bindgen-shared"
3022
- version = "0.2.123"
3046
+ version = "0.2.125"
3023
3047
  source = "registry+https://github.com/rust-lang/crates.io-index"
3024
- checksum = "7acbf7616c27b194bbb550bf77ed0c2c3e5b7fd1260a93082b95fb7f47959b92"
3048
+ checksum = "23939e44bb9a5d7576fa2b563dc2e136628f1224e88a8deed09e04858b77871f"
3025
3049
  dependencies = [
3026
3050
  "unicode-ident",
3027
3051
  ]
@@ -3062,9 +3086,9 @@ dependencies = [
3062
3086
 
3063
3087
  [[package]]
3064
3088
  name = "web-sys"
3065
- version = "0.3.100"
3089
+ version = "0.3.102"
3066
3090
  source = "registry+https://github.com/rust-lang/crates.io-index"
3067
- checksum = "6e0871acf327f283dc6da28a1696cdc64fb355ba9f935d052021fa77f35cce69"
3091
+ checksum = "a6430a72df5eb332242960fe84b3002a241163998241eb596d4f739b9757061d"
3068
3092
  dependencies = [
3069
3093
  "js-sys",
3070
3094
  "wasm-bindgen",
@@ -3523,9 +3547,9 @@ dependencies = [
3523
3547
 
3524
3548
  [[package]]
3525
3549
  name = "zeroize"
3526
- version = "1.8.2"
3550
+ version = "1.9.0"
3527
3551
  source = "registry+https://github.com/rust-lang/crates.io-index"
3528
- checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
3552
+ checksum = "e13c156562582aa81c60cb29407084cdb54c4164760106ab78e6c5b0858cf64e"
3529
3553
 
3530
3554
  [[package]]
3531
3555
  name = "zerotrie"
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "kreuzcrawl-rb"
3
- version = "0.3.0-rc.55"
3
+ version = "0.3.0-rc.59"
4
4
  edition = "2024"
5
5
  license = "Elastic-2.0"
6
6
  description = "High-performance web crawling engine"
@@ -18,7 +18,7 @@ crate-type = ["cdylib"]
18
18
 
19
19
  [dependencies]
20
20
  futures = "0.3"
21
- kreuzcrawl = { version = "0.3.0-rc.55", features = ["interact", "browser-chromiumoxide"] }
21
+ kreuzcrawl = { version = "0.3.0-rc.59", features = ["interact", "browser-chromiumoxide"] }
22
22
  magnus = "0.8"
23
23
  rb-sys = ">=0.9, <0.9.128"
24
24
  serde = { version = "1", features = ["derive"] }
@@ -1,5 +1,5 @@
1
1
  // This file is auto-generated by alef. DO NOT EDIT.
2
- // alef:hash:882a6f513499cb08d9cbeb641424fe238f26f3653e161241824c4ea057a44c4f
2
+ // alef:hash:4a1130eaa7f8e3312e90b66f3eb0a52e657e6378ee013ed7eeaada8817434913
3
3
  // Re-generate with: alef generate
4
4
  #![allow(dead_code, unused_imports, unused_variables)]
5
5
  #![allow(
@@ -400,7 +400,6 @@ pub struct BrowserConfig {
400
400
  wait: BrowserWait,
401
401
  wait_selector: Option<String>,
402
402
  extra_wait: Option<u64>,
403
- stealth: bool,
404
403
  proxy: Option<ProxyConfig>,
405
404
  block_url_patterns: Vec<String>,
406
405
  eval_script: Option<String>,
@@ -447,7 +446,6 @@ impl Default for BrowserConfig {
447
446
  wait: Default::default(),
448
447
  wait_selector: None,
449
448
  extra_wait: None,
450
- stealth: false,
451
449
  proxy: None,
452
450
  block_url_patterns: vec![],
453
451
  eval_script: None,
@@ -490,10 +488,6 @@ impl BrowserConfig {
490
488
  extra_wait: kwargs
491
489
  .get(ruby.to_symbol("extra_wait"))
492
490
  .and_then(|v| u64::try_convert(v).ok()),
493
- stealth: kwargs
494
- .get(ruby.to_symbol("stealth"))
495
- .and_then(|v| bool::try_convert(v).ok())
496
- .unwrap_or(false),
497
491
  proxy: kwargs
498
492
  .get(ruby.to_symbol("proxy"))
499
493
  .and_then(|v| ProxyConfig::try_convert(v).ok()),
@@ -546,10 +540,6 @@ impl BrowserConfig {
546
540
  self.extra_wait.clone()
547
541
  }
548
542
 
549
- fn stealth(&self) -> bool {
550
- self.stealth
551
- }
552
-
553
543
  fn proxy(&self) -> Option<ProxyConfig> {
554
544
  self.proxy.clone()
555
545
  }
@@ -610,6 +600,8 @@ pub struct CrawlConfig {
610
600
  proxy: Option<ProxyConfig>,
611
601
  user_agents: Vec<String>,
612
602
  capture_screenshot: bool,
603
+ follow_document_urls: bool,
604
+ document_url_depth: Option<u32>,
613
605
  download_documents: bool,
614
606
  document_max_size: Option<usize>,
615
607
  document_mime_types: Vec<String>,
@@ -679,6 +671,8 @@ impl Default for CrawlConfig {
679
671
  proxy: None,
680
672
  user_agents: vec![],
681
673
  capture_screenshot: false,
674
+ follow_document_urls: false,
675
+ document_url_depth: None,
682
676
  download_documents: true,
683
677
  document_max_size: None,
684
678
  document_mime_types: vec![],
@@ -805,6 +799,13 @@ impl CrawlConfig {
805
799
  .get(ruby.to_symbol("capture_screenshot"))
806
800
  .and_then(|v| bool::try_convert(v).ok())
807
801
  .unwrap_or(false),
802
+ follow_document_urls: kwargs
803
+ .get(ruby.to_symbol("follow_document_urls"))
804
+ .and_then(|v| bool::try_convert(v).ok())
805
+ .unwrap_or(false),
806
+ document_url_depth: kwargs
807
+ .get(ruby.to_symbol("document_url_depth"))
808
+ .and_then(|v| u32::try_convert(v).ok()),
808
809
  download_documents: kwargs
809
810
  .get(ruby.to_symbol("download_documents"))
810
811
  .and_then(|v| bool::try_convert(v).ok())
@@ -949,6 +950,14 @@ impl CrawlConfig {
949
950
  self.capture_screenshot
950
951
  }
951
952
 
953
+ fn follow_document_urls(&self) -> bool {
954
+ self.follow_document_urls
955
+ }
956
+
957
+ fn document_url_depth(&self) -> Option<u32> {
958
+ self.document_url_depth
959
+ }
960
+
952
961
  fn download_documents(&self) -> bool {
953
962
  self.download_documents
954
963
  }
@@ -1041,6 +1050,10 @@ impl CrawlConfig {
1041
1050
 
1042
1051
  capture_screenshot: self.capture_screenshot,
1043
1052
 
1053
+ follow_document_urls: self.follow_document_urls,
1054
+
1055
+ document_url_depth: self.document_url_depth,
1056
+
1044
1057
  download_documents: self.download_documents,
1045
1058
 
1046
1059
  document_max_size: self.document_max_size,
@@ -4411,6 +4424,7 @@ pub enum BrowserMode {
4411
4424
  Auto,
4412
4425
  Always,
4413
4426
  Never,
4427
+ Stealth,
4414
4428
  }
4415
4429
 
4416
4430
  impl Default for BrowserMode {
@@ -4425,6 +4439,7 @@ impl magnus::IntoValue for BrowserMode {
4425
4439
  BrowserMode::Auto => "auto",
4426
4440
  BrowserMode::Always => "always",
4427
4441
  BrowserMode::Never => "never",
4442
+ BrowserMode::Stealth => "stealth",
4428
4443
  };
4429
4444
  handle.to_symbol(sym).into_value_with(handle)
4430
4445
  }
@@ -4439,6 +4454,7 @@ impl magnus::TryConvert for BrowserMode {
4439
4454
  "auto" | "Auto" => Ok(BrowserMode::Auto),
4440
4455
  "always" | "Always" => Ok(BrowserMode::Always),
4441
4456
  "never" | "Never" => Ok(BrowserMode::Never),
4457
+ "stealth" | "Stealth" => Ok(BrowserMode::Stealth),
4442
4458
  other => Err(magnus::Error::new(
4443
4459
  unsafe { Ruby::get_unchecked() }.exception_arg_error(),
4444
4460
  format!("invalid BrowserMode value: {other}"),
@@ -5351,7 +5367,6 @@ impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
5351
5367
  wait: val.wait.into(),
5352
5368
  wait_selector: val.wait_selector,
5353
5369
  extra_wait: val.extra_wait.map(std::time::Duration::from_millis),
5354
- stealth: val.stealth,
5355
5370
  proxy: val.proxy.map(Into::into),
5356
5371
  block_url_patterns: val.block_url_patterns.into_iter().collect(),
5357
5372
  eval_script: val.eval_script,
@@ -5373,7 +5388,6 @@ impl From<kreuzcrawl::BrowserConfig> for BrowserConfig {
5373
5388
  wait: val.wait.into(),
5374
5389
  wait_selector: val.wait_selector.map(|v| v.to_string()),
5375
5390
  extra_wait: val.extra_wait.map(|d| d.as_millis() as u64),
5376
- stealth: val.stealth,
5377
5391
  proxy: val.proxy.map(Into::into),
5378
5392
  block_url_patterns: val.block_url_patterns.into_iter().collect(),
5379
5393
  eval_script: val.eval_script.map(|v| v.to_string()),
@@ -5423,6 +5437,8 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
5423
5437
  proxy: val.proxy.map(Into::into),
5424
5438
  user_agents: val.user_agents.into_iter().collect(),
5425
5439
  capture_screenshot: val.capture_screenshot,
5440
+ follow_document_urls: val.follow_document_urls,
5441
+ document_url_depth: val.document_url_depth,
5426
5442
  download_documents: val.download_documents,
5427
5443
  document_max_size: val.document_max_size,
5428
5444
  document_mime_types: val.document_mime_types.into_iter().collect(),
@@ -5473,6 +5489,8 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
5473
5489
  proxy: val.proxy.map(Into::into),
5474
5490
  user_agents: val.user_agents.into_iter().collect(),
5475
5491
  capture_screenshot: val.capture_screenshot,
5492
+ follow_document_urls: val.follow_document_urls,
5493
+ document_url_depth: val.document_url_depth,
5476
5494
  download_documents: val.download_documents,
5477
5495
  document_max_size: val.document_max_size,
5478
5496
  document_mime_types: val.document_mime_types.into_iter().collect(),
@@ -6368,6 +6386,7 @@ impl From<BrowserMode> for kreuzcrawl::BrowserMode {
6368
6386
  BrowserMode::Auto => Self::Auto,
6369
6387
  BrowserMode::Always => Self::Always,
6370
6388
  BrowserMode::Never => Self::Never,
6389
+ BrowserMode::Stealth => Self::Stealth,
6371
6390
  }
6372
6391
  }
6373
6392
  }
@@ -6378,6 +6397,7 @@ impl From<kreuzcrawl::BrowserMode> for BrowserMode {
6378
6397
  kreuzcrawl::BrowserMode::Auto => Self::Auto,
6379
6398
  kreuzcrawl::BrowserMode::Always => Self::Always,
6380
6399
  kreuzcrawl::BrowserMode::Never => Self::Never,
6400
+ kreuzcrawl::BrowserMode::Stealth => Self::Stealth,
6381
6401
  }
6382
6402
  }
6383
6403
  }
@@ -6892,8 +6912,6 @@ fn ruby_init(ruby: &Ruby) -> Result<(), Error> {
6892
6912
 
6893
6913
  class.define_method("extra_wait", method!(BrowserConfig::extra_wait, 0))?;
6894
6914
 
6895
- class.define_method("stealth", method!(BrowserConfig::stealth, 0))?;
6896
-
6897
6915
  class.define_method("proxy", method!(BrowserConfig::proxy, 0))?;
6898
6916
 
6899
6917
  class.define_method("block_url_patterns", method!(BrowserConfig::block_url_patterns, 0))?;
@@ -6973,6 +6991,10 @@ fn ruby_init(ruby: &Ruby) -> Result<(), Error> {
6973
6991
 
6974
6992
  class.define_method("capture_screenshot", method!(CrawlConfig::capture_screenshot, 0))?;
6975
6993
 
6994
+ class.define_method("follow_document_urls", method!(CrawlConfig::follow_document_urls, 0))?;
6995
+
6996
+ class.define_method("document_url_depth", method!(CrawlConfig::document_url_depth, 0))?;
6997
+
6976
6998
  class.define_method("download_documents", method!(CrawlConfig::download_documents, 0))?;
6977
6999
 
6978
7000
  class.define_method("document_max_size", method!(CrawlConfig::document_max_size, 0))?;
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:882a6f513499cb08d9cbeb641424fe238f26f3653e161241824c4ea057a44c4f
2
+ # alef:hash:4a1130eaa7f8e3312e90b66f3eb0a52e657e6378ee013ed7eeaada8817434913
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
@@ -1,10 +1,10 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:882a6f513499cb08d9cbeb641424fe238f26f3653e161241824c4ea057a44c4f
2
+ # alef:hash:4a1130eaa7f8e3312e90b66f3eb0a52e657e6378ee013ed7eeaada8817434913
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
6
6
 
7
7
  module Kreuzcrawl
8
8
  ## The version string for this package.
9
- VERSION = "0.3.0.pre.rc.55"
9
+ VERSION = "0.3.0.pre.rc.59"
10
10
  end
data/lib/kreuzcrawl.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:882a6f513499cb08d9cbeb641424fe238f26f3653e161241824c4ea057a44c4f
2
+ # alef:hash:4a1130eaa7f8e3312e90b66f3eb0a52e657e6378ee013ed7eeaada8817434913
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
  # frozen_string_literal: true
data/lib/kreuzcrawl_rb.so CHANGED
Binary file
data/sig/types.rbs CHANGED
@@ -1,5 +1,5 @@
1
1
  # This file is auto-generated by alef — DO NOT EDIT.
2
- # alef:hash:882a6f513499cb08d9cbeb641424fe238f26f3653e161241824c4ea057a44c4f
2
+ # alef:hash:4a1130eaa7f8e3312e90b66f3eb0a52e657e6378ee013ed7eeaada8817434913
3
3
  # To regenerate: alef generate
4
4
  # To verify freshness: alef verify --exit-code
5
5
 
@@ -53,7 +53,6 @@ module Kreuzcrawl
53
53
  attr_accessor wait: BrowserWait?
54
54
  attr_accessor wait_selector: String?
55
55
  attr_accessor extra_wait: Integer?
56
- attr_accessor stealth: bool?
57
56
  attr_accessor proxy: ProxyConfig?
58
57
  attr_accessor block_url_patterns: Array[String]?
59
58
  attr_accessor eval_script: String?
@@ -61,7 +60,7 @@ module Kreuzcrawl
61
60
  attr_accessor capture_network_events: bool?
62
61
  attr_accessor session_affinity: bool?
63
62
 
64
- def initialize: (?mode: BrowserMode, ?backend: BrowserBackend, ?endpoint: String, ?timeout: Integer, ?wait: BrowserWait, ?wait_selector: String, ?extra_wait: Integer, ?stealth: bool, ?proxy: ProxyConfig, ?block_url_patterns: Array[String], ?eval_script: String, ?robots_user_agent: String, ?capture_network_events: bool, ?session_affinity: bool) -> void
63
+ def initialize: (?mode: BrowserMode, ?backend: BrowserBackend, ?endpoint: String, ?timeout: Integer, ?wait: BrowserWait, ?wait_selector: String, ?extra_wait: Integer, ?proxy: ProxyConfig, ?block_url_patterns: Array[String], ?eval_script: String, ?robots_user_agent: String, ?capture_network_events: bool, ?session_affinity: bool) -> void
65
64
  def self.default: () -> BrowserConfig
66
65
  end
67
66
 
@@ -96,6 +95,8 @@ module Kreuzcrawl
96
95
  attr_accessor proxy: ProxyConfig?
97
96
  attr_accessor user_agents: Array[String]?
98
97
  attr_accessor capture_screenshot: bool?
98
+ attr_accessor follow_document_urls: bool?
99
+ attr_accessor document_url_depth: Integer?
99
100
  attr_accessor download_documents: bool?
100
101
  attr_accessor document_max_size: Integer?
101
102
  attr_accessor document_mime_types: Array[String]?
@@ -103,7 +104,7 @@ module Kreuzcrawl
103
104
  attr_accessor browser_profile: String?
104
105
  attr_accessor save_browser_profile: bool?
105
106
 
106
- def initialize: (?max_depth: Integer, ?max_pages: Integer, ?max_concurrent: Integer, ?respect_robots_txt: bool, ?soft_http_errors: bool, ?user_agent: String, ?stay_on_domain: bool, ?allow_subdomains: bool, ?include_paths: Array[String], ?exclude_paths: Array[String], ?custom_headers: Hash[String, String], ?request_timeout: Integer, ?rate_limit_ms: Integer, ?max_redirects: Integer, ?retry_count: Integer, ?retry_codes: Array[Integer], ?cookies_enabled: bool, ?auth: AuthConfig, ?max_body_size: Integer, ?remove_tags: Array[String], ?content: ContentConfig, ?map_limit: Integer, ?map_search: String, ?download_assets: bool, ?asset_types: Array[AssetCategory], ?max_asset_size: Integer, ?browser: BrowserConfig, ?proxy: ProxyConfig, ?user_agents: Array[String], ?capture_screenshot: bool, ?download_documents: bool, ?document_max_size: Integer, ?document_mime_types: Array[String], ?warc_output: String, ?browser_profile: String, ?save_browser_profile: bool) -> void
107
+ def initialize: (?max_depth: Integer, ?max_pages: Integer, ?max_concurrent: Integer, ?respect_robots_txt: bool, ?soft_http_errors: bool, ?user_agent: String, ?stay_on_domain: bool, ?allow_subdomains: bool, ?include_paths: Array[String], ?exclude_paths: Array[String], ?custom_headers: Hash[String, String], ?request_timeout: Integer, ?rate_limit_ms: Integer, ?max_redirects: Integer, ?retry_count: Integer, ?retry_codes: Array[Integer], ?cookies_enabled: bool, ?auth: AuthConfig, ?max_body_size: Integer, ?remove_tags: Array[String], ?content: ContentConfig, ?map_limit: Integer, ?map_search: String, ?download_assets: bool, ?asset_types: Array[AssetCategory], ?max_asset_size: Integer, ?browser: BrowserConfig, ?proxy: ProxyConfig, ?user_agents: Array[String], ?capture_screenshot: bool, ?follow_document_urls: bool, ?document_url_depth: Integer, ?download_documents: bool, ?document_max_size: Integer, ?document_mime_types: Array[String], ?warc_output: String, ?browser_profile: String, ?save_browser_profile: bool) -> void
107
108
  def validate: () -> void
108
109
  def self.default: () -> CrawlConfig
109
110
  end
@@ -460,7 +461,7 @@ module Kreuzcrawl
460
461
  end
461
462
 
462
463
  class BrowserMode
463
- type value = :auto | :always | :never
464
+ type value = :auto | :always | :never | :stealth
464
465
  end
465
466
 
466
467
  class BrowserWait
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kreuzcrawl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0.pre.rc.55
4
+ version: 0.3.0.pre.rc.59
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kreuzberg Team
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-06-12 00:00:00.000000000 Z
11
+ date: 2026-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rb_sys
@@ -78,6 +78,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: 3.2.0
81
+ - - "<"
82
+ - !ruby/object:Gem::Version
83
+ version: '4.0'
81
84
  required_rubygems_version: !ruby/object:Gem::Requirement
82
85
  requirements:
83
86
  - - ">="