kreuzcrawl 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/kreuzcrawl_rb/Cargo.lock +2 -2
- data/ext/kreuzcrawl_rb/Cargo.toml +1 -1
- data/ext/kreuzcrawl_rb/src/kreuzcrawl/version.rb +1 -1
- data/ext/kreuzcrawl_rb/src/lib.rs +27 -328
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a2f4027aef59737e93add85d5da2398c4a95767fb41766b86822f50199331e32
|
|
4
|
+
data.tar.gz: 82f38cf86e988321ef9bc11f5a42a023df464e79d0d0ad646edeae564cabfec6
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 1ebcebff06f45e809441c0cae1d9f14be48a79b6d1439b3421ebedd4808d9c71e6ffa86d51a6c1d64ba57a0bc2eeffa24a672a4af3db5659185ba5c8cca8443a
|
|
7
|
+
data.tar.gz: af29b39fdf985044e579d71064af9eb10dc348c81a82e7f27dc0650c4720ee1164fd8fe5529358cc9ba494b0fc296e9a425b17182794d001b3bcee526f737d35
|
|
@@ -1174,7 +1174,7 @@ dependencies = [
|
|
|
1174
1174
|
|
|
1175
1175
|
[[package]]
|
|
1176
1176
|
name = "kreuzcrawl"
|
|
1177
|
-
version = "0.1.
|
|
1177
|
+
version = "0.1.1"
|
|
1178
1178
|
dependencies = [
|
|
1179
1179
|
"ahash",
|
|
1180
1180
|
"astral-tl",
|
|
@@ -1200,7 +1200,7 @@ dependencies = [
|
|
|
1200
1200
|
|
|
1201
1201
|
[[package]]
|
|
1202
1202
|
name = "kreuzcrawl-rb"
|
|
1203
|
-
version = "0.1.
|
|
1203
|
+
version = "0.1.1"
|
|
1204
1204
|
dependencies = [
|
|
1205
1205
|
"kreuzcrawl",
|
|
1206
1206
|
"magnus",
|
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
// This file is auto-generated by alef. DO NOT EDIT.
|
|
2
2
|
// Re-generate with: alef generate
|
|
3
3
|
#![allow(dead_code)]
|
|
4
|
+
#![allow(
|
|
5
|
+
clippy::too_many_arguments,
|
|
6
|
+
clippy::let_unit_value,
|
|
7
|
+
clippy::needless_borrow,
|
|
8
|
+
clippy::map_identity,
|
|
9
|
+
clippy::just_underscores_and_digits,
|
|
10
|
+
clippy::unused_unit,
|
|
11
|
+
clippy::unnecessary_cast,
|
|
12
|
+
clippy::unwrap_or_default,
|
|
13
|
+
clippy::derivable_impls,
|
|
14
|
+
clippy::needless_borrows_for_generic_args,
|
|
15
|
+
clippy::unnecessary_fallible_conversions
|
|
16
|
+
)]
|
|
4
17
|
|
|
5
18
|
use magnus::{Error, IntoValueFromNative, Ruby, function, method, prelude::*, try_convert::TryConvertOwned};
|
|
6
19
|
use std::collections::HashMap;
|
|
@@ -243,6 +256,7 @@ pub struct CrawlConfig {
|
|
|
243
256
|
pub exclude_paths: Vec<String>,
|
|
244
257
|
pub custom_headers: HashMap<String, String>,
|
|
245
258
|
pub request_timeout: u64,
|
|
259
|
+
pub rate_limit_ms: Option<u64>,
|
|
246
260
|
pub max_redirects: usize,
|
|
247
261
|
pub retry_count: usize,
|
|
248
262
|
pub retry_codes: Vec<u16>,
|
|
@@ -322,6 +336,9 @@ impl CrawlConfig {
|
|
|
322
336
|
.get(ruby.to_symbol("request_timeout"))
|
|
323
337
|
.and_then(|v| u64::try_convert(v).ok())
|
|
324
338
|
.unwrap_or(30000),
|
|
339
|
+
rate_limit_ms: kwargs
|
|
340
|
+
.get(ruby.to_symbol("rate_limit_ms"))
|
|
341
|
+
.and_then(|v| u64::try_convert(v).ok()),
|
|
325
342
|
max_redirects: kwargs
|
|
326
343
|
.get(ruby.to_symbol("max_redirects"))
|
|
327
344
|
.and_then(|v| usize::try_convert(v).ok())
|
|
@@ -452,6 +469,10 @@ impl CrawlConfig {
|
|
|
452
469
|
self.request_timeout.clone()
|
|
453
470
|
}
|
|
454
471
|
|
|
472
|
+
fn rate_limit_ms(&self) -> Option<u64> {
|
|
473
|
+
self.rate_limit_ms
|
|
474
|
+
}
|
|
475
|
+
|
|
455
476
|
fn max_redirects(&self) -> usize {
|
|
456
477
|
self.max_redirects
|
|
457
478
|
}
|
|
@@ -558,6 +579,7 @@ impl CrawlConfig {
|
|
|
558
579
|
exclude_paths: self.exclude_paths.clone(),
|
|
559
580
|
custom_headers: self.custom_headers.clone().into_iter().collect(),
|
|
560
581
|
request_timeout: std::time::Duration::from_millis(self.request_timeout),
|
|
582
|
+
rate_limit_ms: self.rate_limit_ms,
|
|
561
583
|
max_redirects: self.max_redirects,
|
|
562
584
|
retry_count: self.retry_count,
|
|
563
585
|
retry_codes: self.retry_codes.clone(),
|
|
@@ -680,140 +702,6 @@ impl DownloadedDocument {
|
|
|
680
702
|
}
|
|
681
703
|
}
|
|
682
704
|
|
|
683
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
684
|
-
#[magnus::wrap(class = "Kreuzcrawl::InteractionResult")]
|
|
685
|
-
#[serde(default)]
|
|
686
|
-
pub struct InteractionResult {
|
|
687
|
-
pub action_results: Vec<ActionResult>,
|
|
688
|
-
pub final_html: String,
|
|
689
|
-
pub final_url: String,
|
|
690
|
-
pub screenshot: Option<Vec<u8>>,
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
unsafe impl IntoValueFromNative for InteractionResult {}
|
|
694
|
-
|
|
695
|
-
impl magnus::TryConvert for InteractionResult {
|
|
696
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
697
|
-
let r: &InteractionResult = magnus::TryConvert::try_convert(val)?;
|
|
698
|
-
Ok(r.clone())
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
unsafe impl TryConvertOwned for InteractionResult {}
|
|
702
|
-
|
|
703
|
-
impl Default for InteractionResult {
|
|
704
|
-
fn default() -> Self {
|
|
705
|
-
Self {
|
|
706
|
-
action_results: Default::default(),
|
|
707
|
-
final_html: Default::default(),
|
|
708
|
-
final_url: Default::default(),
|
|
709
|
-
screenshot: Default::default(),
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
impl InteractionResult {
|
|
715
|
-
fn new(
|
|
716
|
-
action_results: Option<Vec<ActionResult>>,
|
|
717
|
-
final_html: Option<String>,
|
|
718
|
-
final_url: Option<String>,
|
|
719
|
-
screenshot: Option<Vec<u8>>,
|
|
720
|
-
) -> Self {
|
|
721
|
-
Self {
|
|
722
|
-
action_results: action_results.unwrap_or_default(),
|
|
723
|
-
final_html: final_html.unwrap_or_default(),
|
|
724
|
-
final_url: final_url.unwrap_or_default(),
|
|
725
|
-
screenshot,
|
|
726
|
-
}
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
fn action_results(&self) -> Vec<ActionResult> {
|
|
730
|
-
self.action_results.clone()
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
fn final_html(&self) -> String {
|
|
734
|
-
self.final_html.clone()
|
|
735
|
-
}
|
|
736
|
-
|
|
737
|
-
fn final_url(&self) -> String {
|
|
738
|
-
self.final_url.clone()
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
fn screenshot(&self) -> Option<Vec<u8>> {
|
|
742
|
-
self.screenshot.clone()
|
|
743
|
-
}
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
747
|
-
#[magnus::wrap(class = "Kreuzcrawl::ActionResult")]
|
|
748
|
-
#[serde(default)]
|
|
749
|
-
pub struct ActionResult {
|
|
750
|
-
pub action_index: usize,
|
|
751
|
-
pub action_type: String,
|
|
752
|
-
pub success: bool,
|
|
753
|
-
pub data: Option<String>,
|
|
754
|
-
pub error: Option<String>,
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
unsafe impl IntoValueFromNative for ActionResult {}
|
|
758
|
-
|
|
759
|
-
impl magnus::TryConvert for ActionResult {
|
|
760
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
761
|
-
let r: &ActionResult = magnus::TryConvert::try_convert(val)?;
|
|
762
|
-
Ok(r.clone())
|
|
763
|
-
}
|
|
764
|
-
}
|
|
765
|
-
unsafe impl TryConvertOwned for ActionResult {}
|
|
766
|
-
|
|
767
|
-
impl Default for ActionResult {
|
|
768
|
-
fn default() -> Self {
|
|
769
|
-
Self {
|
|
770
|
-
action_index: Default::default(),
|
|
771
|
-
action_type: Default::default(),
|
|
772
|
-
success: Default::default(),
|
|
773
|
-
data: Default::default(),
|
|
774
|
-
error: Default::default(),
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
}
|
|
778
|
-
|
|
779
|
-
impl ActionResult {
|
|
780
|
-
fn new(
|
|
781
|
-
action_index: Option<usize>,
|
|
782
|
-
action_type: Option<String>,
|
|
783
|
-
success: Option<bool>,
|
|
784
|
-
data: Option<String>,
|
|
785
|
-
error: Option<String>,
|
|
786
|
-
) -> Self {
|
|
787
|
-
Self {
|
|
788
|
-
action_index: action_index.unwrap_or_default(),
|
|
789
|
-
action_type: action_type.unwrap_or_default(),
|
|
790
|
-
success: success.unwrap_or_default(),
|
|
791
|
-
data,
|
|
792
|
-
error,
|
|
793
|
-
}
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
fn action_index(&self) -> usize {
|
|
797
|
-
self.action_index
|
|
798
|
-
}
|
|
799
|
-
|
|
800
|
-
fn action_type(&self) -> String {
|
|
801
|
-
self.action_type.clone()
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
fn success(&self) -> bool {
|
|
805
|
-
self.success
|
|
806
|
-
}
|
|
807
|
-
|
|
808
|
-
fn data(&self) -> Option<String> {
|
|
809
|
-
self.data.clone()
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
fn error(&self) -> Option<String> {
|
|
813
|
-
self.error.clone()
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
|
|
817
705
|
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
818
706
|
#[magnus::wrap(class = "Kreuzcrawl::ScrapeResult")]
|
|
819
707
|
#[serde(default)]
|
|
@@ -1616,93 +1504,6 @@ impl MarkdownResult {
|
|
|
1616
1504
|
}
|
|
1617
1505
|
}
|
|
1618
1506
|
|
|
1619
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
1620
|
-
#[magnus::wrap(class = "Kreuzcrawl::CachedPage")]
|
|
1621
|
-
#[serde(default)]
|
|
1622
|
-
pub struct CachedPage {
|
|
1623
|
-
pub url: String,
|
|
1624
|
-
pub status_code: u16,
|
|
1625
|
-
pub content_type: String,
|
|
1626
|
-
pub body: String,
|
|
1627
|
-
pub etag: Option<String>,
|
|
1628
|
-
pub last_modified: Option<String>,
|
|
1629
|
-
pub cached_at: u64,
|
|
1630
|
-
}
|
|
1631
|
-
|
|
1632
|
-
unsafe impl IntoValueFromNative for CachedPage {}
|
|
1633
|
-
|
|
1634
|
-
impl magnus::TryConvert for CachedPage {
|
|
1635
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
1636
|
-
let r: &CachedPage = magnus::TryConvert::try_convert(val)?;
|
|
1637
|
-
Ok(r.clone())
|
|
1638
|
-
}
|
|
1639
|
-
}
|
|
1640
|
-
unsafe impl TryConvertOwned for CachedPage {}
|
|
1641
|
-
|
|
1642
|
-
impl Default for CachedPage {
|
|
1643
|
-
fn default() -> Self {
|
|
1644
|
-
Self {
|
|
1645
|
-
url: Default::default(),
|
|
1646
|
-
status_code: Default::default(),
|
|
1647
|
-
content_type: Default::default(),
|
|
1648
|
-
body: Default::default(),
|
|
1649
|
-
etag: Default::default(),
|
|
1650
|
-
last_modified: Default::default(),
|
|
1651
|
-
cached_at: Default::default(),
|
|
1652
|
-
}
|
|
1653
|
-
}
|
|
1654
|
-
}
|
|
1655
|
-
|
|
1656
|
-
impl CachedPage {
|
|
1657
|
-
fn new(
|
|
1658
|
-
url: Option<String>,
|
|
1659
|
-
status_code: Option<u16>,
|
|
1660
|
-
content_type: Option<String>,
|
|
1661
|
-
body: Option<String>,
|
|
1662
|
-
etag: Option<String>,
|
|
1663
|
-
last_modified: Option<String>,
|
|
1664
|
-
cached_at: Option<u64>,
|
|
1665
|
-
) -> Self {
|
|
1666
|
-
Self {
|
|
1667
|
-
url: url.unwrap_or_default(),
|
|
1668
|
-
status_code: status_code.unwrap_or_default(),
|
|
1669
|
-
content_type: content_type.unwrap_or_default(),
|
|
1670
|
-
body: body.unwrap_or_default(),
|
|
1671
|
-
etag,
|
|
1672
|
-
last_modified,
|
|
1673
|
-
cached_at: cached_at.unwrap_or_default(),
|
|
1674
|
-
}
|
|
1675
|
-
}
|
|
1676
|
-
|
|
1677
|
-
fn url(&self) -> String {
|
|
1678
|
-
self.url.clone()
|
|
1679
|
-
}
|
|
1680
|
-
|
|
1681
|
-
fn status_code(&self) -> u16 {
|
|
1682
|
-
self.status_code
|
|
1683
|
-
}
|
|
1684
|
-
|
|
1685
|
-
fn content_type(&self) -> String {
|
|
1686
|
-
self.content_type.clone()
|
|
1687
|
-
}
|
|
1688
|
-
|
|
1689
|
-
fn body(&self) -> String {
|
|
1690
|
-
self.body.clone()
|
|
1691
|
-
}
|
|
1692
|
-
|
|
1693
|
-
fn etag(&self) -> Option<String> {
|
|
1694
|
-
self.etag.clone()
|
|
1695
|
-
}
|
|
1696
|
-
|
|
1697
|
-
fn last_modified(&self) -> Option<String> {
|
|
1698
|
-
self.last_modified.clone()
|
|
1699
|
-
}
|
|
1700
|
-
|
|
1701
|
-
fn cached_at(&self) -> u64 {
|
|
1702
|
-
self.cached_at
|
|
1703
|
-
}
|
|
1704
|
-
}
|
|
1705
|
-
|
|
1706
1507
|
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
|
|
1707
1508
|
#[magnus::wrap(class = "Kreuzcrawl::LinkInfo")]
|
|
1708
1509
|
#[serde(default)]
|
|
@@ -3295,39 +3096,6 @@ impl magnus::TryConvert for AssetCategory {
|
|
|
3295
3096
|
unsafe impl IntoValueFromNative for AssetCategory {}
|
|
3296
3097
|
unsafe impl TryConvertOwned for AssetCategory {}
|
|
3297
3098
|
|
|
3298
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
3299
|
-
pub enum CrawlEvent {
|
|
3300
|
-
Page { _0: CrawlPageResult },
|
|
3301
|
-
Error { url: String, error: String },
|
|
3302
|
-
Complete { pages_crawled: usize },
|
|
3303
|
-
}
|
|
3304
|
-
|
|
3305
|
-
impl Default for CrawlEvent {
|
|
3306
|
-
fn default() -> Self {
|
|
3307
|
-
Self::Page { _0: Default::default() }
|
|
3308
|
-
}
|
|
3309
|
-
}
|
|
3310
|
-
|
|
3311
|
-
impl magnus::IntoValue for CrawlEvent {
|
|
3312
|
-
fn into_value_with(self, handle: &Ruby) -> magnus::Value {
|
|
3313
|
-
match serde_json::to_value(&self) {
|
|
3314
|
-
Ok(v) => json_to_ruby(handle, v),
|
|
3315
|
-
Err(_) => handle.qnil().into_value_with(handle),
|
|
3316
|
-
}
|
|
3317
|
-
}
|
|
3318
|
-
}
|
|
3319
|
-
|
|
3320
|
-
impl magnus::TryConvert for CrawlEvent {
|
|
3321
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
3322
|
-
let s: String = magnus::TryConvert::try_convert(val)?;
|
|
3323
|
-
serde_json::from_str(&s)
|
|
3324
|
-
.map_err(|e| magnus::Error::new(unsafe { Ruby::get_unchecked() }.exception_type_error(), e.to_string()))
|
|
3325
|
-
}
|
|
3326
|
-
}
|
|
3327
|
-
|
|
3328
|
-
unsafe impl IntoValueFromNative for CrawlEvent {}
|
|
3329
|
-
unsafe impl TryConvertOwned for CrawlEvent {}
|
|
3330
|
-
|
|
3331
3099
|
fn create_engine(config: Option<String>) -> Result<CrawlEngineHandle, Error> {
|
|
3332
3100
|
let config: Option<CrawlConfig> = config
|
|
3333
3101
|
.as_deref()
|
|
@@ -3587,6 +3355,7 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
3587
3355
|
exclude_paths: val.exclude_paths,
|
|
3588
3356
|
custom_headers: val.custom_headers.into_iter().collect(),
|
|
3589
3357
|
request_timeout: std::time::Duration::from_millis(val.request_timeout),
|
|
3358
|
+
rate_limit_ms: val.rate_limit_ms,
|
|
3590
3359
|
max_redirects: val.max_redirects,
|
|
3591
3360
|
retry_count: val.retry_count,
|
|
3592
3361
|
retry_codes: val.retry_codes,
|
|
@@ -3629,6 +3398,7 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
|
|
|
3629
3398
|
exclude_paths: val.exclude_paths,
|
|
3630
3399
|
custom_headers: val.custom_headers.into_iter().collect(),
|
|
3631
3400
|
request_timeout: val.request_timeout.as_millis() as u64,
|
|
3401
|
+
rate_limit_ms: val.rate_limit_ms,
|
|
3632
3402
|
max_redirects: val.max_redirects,
|
|
3633
3403
|
retry_count: val.retry_count,
|
|
3634
3404
|
retry_codes: val.retry_codes,
|
|
@@ -3677,40 +3447,17 @@ impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
|
|
|
3677
3447
|
mime_type: format!("{:?}", val.mime_type),
|
|
3678
3448
|
content: val.content.to_vec(),
|
|
3679
3449
|
size: val.size,
|
|
3680
|
-
filename: val.filename.as_ref().map(|v| format!("{:?}"
|
|
3450
|
+
filename: val.filename.as_ref().map(|v| format!("{v:?}")),
|
|
3681
3451
|
content_hash: format!("{:?}", val.content_hash),
|
|
3682
3452
|
headers: val
|
|
3683
3453
|
.headers
|
|
3684
3454
|
.into_iter()
|
|
3685
|
-
.map(|(k, v)| (
|
|
3455
|
+
.map(|(k, v)| (k.to_string(), v.to_string()))
|
|
3686
3456
|
.collect(),
|
|
3687
3457
|
}
|
|
3688
3458
|
}
|
|
3689
3459
|
}
|
|
3690
3460
|
|
|
3691
|
-
impl From<kreuzcrawl::InteractionResult> for InteractionResult {
|
|
3692
|
-
fn from(val: kreuzcrawl::InteractionResult) -> Self {
|
|
3693
|
-
Self {
|
|
3694
|
-
action_results: val.action_results.into_iter().map(Into::into).collect(),
|
|
3695
|
-
final_html: val.final_html,
|
|
3696
|
-
final_url: val.final_url,
|
|
3697
|
-
screenshot: val.screenshot.map(|v| v.to_vec()),
|
|
3698
|
-
}
|
|
3699
|
-
}
|
|
3700
|
-
}
|
|
3701
|
-
|
|
3702
|
-
impl From<kreuzcrawl::ActionResult> for ActionResult {
|
|
3703
|
-
fn from(val: kreuzcrawl::ActionResult) -> Self {
|
|
3704
|
-
Self {
|
|
3705
|
-
action_index: val.action_index,
|
|
3706
|
-
action_type: format!("{:?}", val.action_type),
|
|
3707
|
-
success: val.success,
|
|
3708
|
-
data: val.data.as_ref().map(ToString::to_string),
|
|
3709
|
-
error: val.error,
|
|
3710
|
-
}
|
|
3711
|
-
}
|
|
3712
|
-
}
|
|
3713
|
-
|
|
3714
3461
|
impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
3715
3462
|
fn from(val: ScrapeResult) -> Self {
|
|
3716
3463
|
Self {
|
|
@@ -3934,20 +3681,6 @@ impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
|
|
|
3934
3681
|
}
|
|
3935
3682
|
}
|
|
3936
3683
|
|
|
3937
|
-
impl From<kreuzcrawl::CachedPage> for CachedPage {
|
|
3938
|
-
fn from(val: kreuzcrawl::CachedPage) -> Self {
|
|
3939
|
-
Self {
|
|
3940
|
-
url: val.url,
|
|
3941
|
-
status_code: val.status_code,
|
|
3942
|
-
content_type: val.content_type,
|
|
3943
|
-
body: val.body,
|
|
3944
|
-
etag: val.etag,
|
|
3945
|
-
last_modified: val.last_modified,
|
|
3946
|
-
cached_at: val.cached_at,
|
|
3947
|
-
}
|
|
3948
|
-
}
|
|
3949
|
-
}
|
|
3950
|
-
|
|
3951
3684
|
impl From<LinkInfo> for kreuzcrawl::LinkInfo {
|
|
3952
3685
|
fn from(val: LinkInfo) -> Self {
|
|
3953
3686
|
Self {
|
|
@@ -4530,16 +4263,6 @@ impl From<kreuzcrawl::AssetCategory> for AssetCategory {
|
|
|
4530
4263
|
}
|
|
4531
4264
|
}
|
|
4532
4265
|
|
|
4533
|
-
impl From<kreuzcrawl::CrawlEvent> for CrawlEvent {
|
|
4534
|
-
fn from(val: kreuzcrawl::CrawlEvent) -> Self {
|
|
4535
|
-
match val {
|
|
4536
|
-
kreuzcrawl::CrawlEvent::Page(_0) => Self::Page { _0: (*_0).into() },
|
|
4537
|
-
kreuzcrawl::CrawlEvent::Error { url, error } => Self::Error { url, error },
|
|
4538
|
-
kreuzcrawl::CrawlEvent::Complete { pages_crawled } => Self::Complete { pages_crawled },
|
|
4539
|
-
}
|
|
4540
|
-
}
|
|
4541
|
-
}
|
|
4542
|
-
|
|
4543
4266
|
/// Convert a `kreuzcrawl::CrawlError` error to a Magnus runtime error.
|
|
4544
4267
|
#[allow(dead_code)]
|
|
4545
4268
|
fn crawl_error_to_magnus_err(e: kreuzcrawl::CrawlError) -> magnus::Error {
|
|
@@ -4587,6 +4310,7 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4587
4310
|
class.define_method("exclude_paths", method!(CrawlConfig::exclude_paths, 0))?;
|
|
4588
4311
|
class.define_method("custom_headers", method!(CrawlConfig::custom_headers, 0))?;
|
|
4589
4312
|
class.define_method("request_timeout", method!(CrawlConfig::request_timeout, 0))?;
|
|
4313
|
+
class.define_method("rate_limit_ms", method!(CrawlConfig::rate_limit_ms, 0))?;
|
|
4590
4314
|
class.define_method("max_redirects", method!(CrawlConfig::max_redirects, 0))?;
|
|
4591
4315
|
class.define_method("retry_count", method!(CrawlConfig::retry_count, 0))?;
|
|
4592
4316
|
class.define_method("retry_codes", method!(CrawlConfig::retry_codes, 0))?;
|
|
@@ -4622,21 +4346,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4622
4346
|
class.define_method("content_hash", method!(DownloadedDocument::content_hash, 0))?;
|
|
4623
4347
|
class.define_method("headers", method!(DownloadedDocument::headers, 0))?;
|
|
4624
4348
|
|
|
4625
|
-
let class = module.define_class("InteractionResult", ruby.class_object())?;
|
|
4626
|
-
class.define_singleton_method("new", function!(InteractionResult::new, 4))?;
|
|
4627
|
-
class.define_method("action_results", method!(InteractionResult::action_results, 0))?;
|
|
4628
|
-
class.define_method("final_html", method!(InteractionResult::final_html, 0))?;
|
|
4629
|
-
class.define_method("final_url", method!(InteractionResult::final_url, 0))?;
|
|
4630
|
-
class.define_method("screenshot", method!(InteractionResult::screenshot, 0))?;
|
|
4631
|
-
|
|
4632
|
-
let class = module.define_class("ActionResult", ruby.class_object())?;
|
|
4633
|
-
class.define_singleton_method("new", function!(ActionResult::new, 5))?;
|
|
4634
|
-
class.define_method("action_index", method!(ActionResult::action_index, 0))?;
|
|
4635
|
-
class.define_method("action_type", method!(ActionResult::action_type, 0))?;
|
|
4636
|
-
class.define_method("success", method!(ActionResult::success, 0))?;
|
|
4637
|
-
class.define_method("data", method!(ActionResult::data, 0))?;
|
|
4638
|
-
class.define_method("error", method!(ActionResult::error, 0))?;
|
|
4639
|
-
|
|
4640
4349
|
let class = module.define_class("ScrapeResult", ruby.class_object())?;
|
|
4641
4350
|
class.define_singleton_method("new", function!(ScrapeResult::new, 1))?;
|
|
4642
4351
|
class.define_method("status_code", method!(ScrapeResult::status_code, 0))?;
|
|
@@ -4725,16 +4434,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4725
4434
|
class.define_method("citations", method!(MarkdownResult::citations, 0))?;
|
|
4726
4435
|
class.define_method("fit_content", method!(MarkdownResult::fit_content, 0))?;
|
|
4727
4436
|
|
|
4728
|
-
let class = module.define_class("CachedPage", ruby.class_object())?;
|
|
4729
|
-
class.define_singleton_method("new", function!(CachedPage::new, 7))?;
|
|
4730
|
-
class.define_method("url", method!(CachedPage::url, 0))?;
|
|
4731
|
-
class.define_method("status_code", method!(CachedPage::status_code, 0))?;
|
|
4732
|
-
class.define_method("content_type", method!(CachedPage::content_type, 0))?;
|
|
4733
|
-
class.define_method("body", method!(CachedPage::body, 0))?;
|
|
4734
|
-
class.define_method("etag", method!(CachedPage::etag, 0))?;
|
|
4735
|
-
class.define_method("last_modified", method!(CachedPage::last_modified, 0))?;
|
|
4736
|
-
class.define_method("cached_at", method!(CachedPage::cached_at, 0))?;
|
|
4737
|
-
|
|
4738
4437
|
let class = module.define_class("LinkInfo", ruby.class_object())?;
|
|
4739
4438
|
class.define_singleton_method("new", function!(LinkInfo::new, 5))?;
|
|
4740
4439
|
class.define_method("url", method!(LinkInfo::url, 0))?;
|