kreuzcrawl 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/kreuzcrawl_rb/Cargo.lock +2 -2
- data/ext/kreuzcrawl_rb/Cargo.toml +1 -1
- data/ext/kreuzcrawl_rb/src/kreuzcrawl/version.rb +1 -1
- data/ext/kreuzcrawl_rb/src/lib.rs +223 -354
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 92e6cb8be4ef5d12847d6d632465cf41b18ce6564652049052b8793162204255
|
|
4
|
+
data.tar.gz: 9bd7b541b57c35b0ddb1cec5d6b119463ca1622db1f57455b340f38b4cf3abd3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 78ae3990a646e6f0a9ecfdb8355d4bfb11f476d3c0dc498651e5ebeb08f28dd43dd848178c43ee4f5788517e0014b3b20f8827990abe9e8e3377cb43c27be773
|
|
7
|
+
data.tar.gz: f43d72f320847d7bf872dc7d59d8cf816bf1c2faf05528998773dbd55ab414dd35380e190f7a0f64eb0e60eae4866c22263eceb0ba05da02a47ad5e98aecbeeb
|
|
@@ -1174,7 +1174,7 @@ dependencies = [
|
|
|
1174
1174
|
|
|
1175
1175
|
[[package]]
|
|
1176
1176
|
name = "kreuzcrawl"
|
|
1177
|
-
version = "0.1.
|
|
1177
|
+
version = "0.1.1"
|
|
1178
1178
|
dependencies = [
|
|
1179
1179
|
"ahash",
|
|
1180
1180
|
"astral-tl",
|
|
@@ -1200,7 +1200,7 @@ dependencies = [
|
|
|
1200
1200
|
|
|
1201
1201
|
[[package]]
|
|
1202
1202
|
name = "kreuzcrawl-rb"
|
|
1203
|
-
version = "0.1.
|
|
1203
|
+
version = "0.1.1"
|
|
1204
1204
|
dependencies = [
|
|
1205
1205
|
"kreuzcrawl",
|
|
1206
1206
|
"magnus",
|
|
@@ -1,6 +1,19 @@
|
|
|
1
1
|
// This file is auto-generated by alef. DO NOT EDIT.
|
|
2
2
|
// Re-generate with: alef generate
|
|
3
3
|
#![allow(dead_code)]
|
|
4
|
+
#![allow(
|
|
5
|
+
clippy::too_many_arguments,
|
|
6
|
+
clippy::let_unit_value,
|
|
7
|
+
clippy::needless_borrow,
|
|
8
|
+
clippy::map_identity,
|
|
9
|
+
clippy::just_underscores_and_digits,
|
|
10
|
+
clippy::unused_unit,
|
|
11
|
+
clippy::unnecessary_cast,
|
|
12
|
+
clippy::unwrap_or_default,
|
|
13
|
+
clippy::derivable_impls,
|
|
14
|
+
clippy::needless_borrows_for_generic_args,
|
|
15
|
+
clippy::unnecessary_fallible_conversions
|
|
16
|
+
)]
|
|
4
17
|
|
|
5
18
|
use magnus::{Error, IntoValueFromNative, Ruby, function, method, prelude::*, try_convert::TryConvertOwned};
|
|
6
19
|
use std::collections::HashMap;
|
|
@@ -162,6 +175,133 @@ impl ProxyConfig {
|
|
|
162
175
|
}
|
|
163
176
|
}
|
|
164
177
|
|
|
178
|
+
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
179
|
+
#[magnus::wrap(class = "Kreuzcrawl::ContentConfig")]
|
|
180
|
+
#[serde(default)]
|
|
181
|
+
pub struct ContentConfig {
|
|
182
|
+
pub output_format: String,
|
|
183
|
+
pub preprocessing_preset: String,
|
|
184
|
+
pub remove_navigation: bool,
|
|
185
|
+
pub remove_forms: bool,
|
|
186
|
+
pub strip_tags: Vec<String>,
|
|
187
|
+
pub preserve_tags: Vec<String>,
|
|
188
|
+
pub exclude_selectors: Vec<String>,
|
|
189
|
+
pub skip_images: bool,
|
|
190
|
+
pub max_depth: Option<usize>,
|
|
191
|
+
pub wrap: bool,
|
|
192
|
+
pub wrap_width: usize,
|
|
193
|
+
pub include_document_structure: bool,
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
unsafe impl IntoValueFromNative for ContentConfig {}
|
|
197
|
+
|
|
198
|
+
impl magnus::TryConvert for ContentConfig {
|
|
199
|
+
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
200
|
+
let r: &ContentConfig = magnus::TryConvert::try_convert(val)?;
|
|
201
|
+
Ok(r.clone())
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
unsafe impl TryConvertOwned for ContentConfig {}
|
|
205
|
+
|
|
206
|
+
impl Default for ContentConfig {
|
|
207
|
+
fn default() -> Self {
|
|
208
|
+
Self {
|
|
209
|
+
output_format: Default::default(),
|
|
210
|
+
preprocessing_preset: Default::default(),
|
|
211
|
+
remove_navigation: Default::default(),
|
|
212
|
+
remove_forms: Default::default(),
|
|
213
|
+
strip_tags: Default::default(),
|
|
214
|
+
preserve_tags: Default::default(),
|
|
215
|
+
exclude_selectors: Default::default(),
|
|
216
|
+
skip_images: Default::default(),
|
|
217
|
+
max_depth: Default::default(),
|
|
218
|
+
wrap: Default::default(),
|
|
219
|
+
wrap_width: Default::default(),
|
|
220
|
+
include_document_structure: Default::default(),
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
impl ContentConfig {
|
|
226
|
+
fn new(
|
|
227
|
+
output_format: Option<String>,
|
|
228
|
+
preprocessing_preset: Option<String>,
|
|
229
|
+
remove_navigation: Option<bool>,
|
|
230
|
+
remove_forms: Option<bool>,
|
|
231
|
+
strip_tags: Option<Vec<String>>,
|
|
232
|
+
preserve_tags: Option<Vec<String>>,
|
|
233
|
+
exclude_selectors: Option<Vec<String>>,
|
|
234
|
+
skip_images: Option<bool>,
|
|
235
|
+
max_depth: Option<usize>,
|
|
236
|
+
wrap: Option<bool>,
|
|
237
|
+
wrap_width: Option<usize>,
|
|
238
|
+
include_document_structure: Option<bool>,
|
|
239
|
+
) -> Self {
|
|
240
|
+
Self {
|
|
241
|
+
output_format: output_format.unwrap_or("markdown".to_string()),
|
|
242
|
+
preprocessing_preset: preprocessing_preset.unwrap_or("standard".to_string()),
|
|
243
|
+
remove_navigation: remove_navigation.unwrap_or(true),
|
|
244
|
+
remove_forms: remove_forms.unwrap_or(true),
|
|
245
|
+
strip_tags: strip_tags.unwrap_or_default(),
|
|
246
|
+
preserve_tags: preserve_tags.unwrap_or_default(),
|
|
247
|
+
exclude_selectors: exclude_selectors.unwrap_or_default(),
|
|
248
|
+
skip_images: skip_images.unwrap_or(false),
|
|
249
|
+
max_depth,
|
|
250
|
+
wrap: wrap.unwrap_or(false),
|
|
251
|
+
wrap_width: wrap_width.unwrap_or(80),
|
|
252
|
+
include_document_structure: include_document_structure.unwrap_or(true),
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
fn output_format(&self) -> String {
|
|
257
|
+
self.output_format.clone()
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
fn preprocessing_preset(&self) -> String {
|
|
261
|
+
self.preprocessing_preset.clone()
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
fn remove_navigation(&self) -> bool {
|
|
265
|
+
self.remove_navigation
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
fn remove_forms(&self) -> bool {
|
|
269
|
+
self.remove_forms
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
fn strip_tags(&self) -> Vec<String> {
|
|
273
|
+
self.strip_tags.clone()
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
fn preserve_tags(&self) -> Vec<String> {
|
|
277
|
+
self.preserve_tags.clone()
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
fn exclude_selectors(&self) -> Vec<String> {
|
|
281
|
+
self.exclude_selectors.clone()
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
fn skip_images(&self) -> bool {
|
|
285
|
+
self.skip_images
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
fn max_depth(&self) -> Option<usize> {
|
|
289
|
+
self.max_depth
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
fn wrap(&self) -> bool {
|
|
293
|
+
self.wrap
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
fn wrap_width(&self) -> usize {
|
|
297
|
+
self.wrap_width
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
fn include_document_structure(&self) -> bool {
|
|
301
|
+
self.include_document_structure
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
165
305
|
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
|
|
166
306
|
#[magnus::wrap(class = "Kreuzcrawl::BrowserConfig")]
|
|
167
307
|
#[serde(default)]
|
|
@@ -243,14 +383,15 @@ pub struct CrawlConfig {
|
|
|
243
383
|
pub exclude_paths: Vec<String>,
|
|
244
384
|
pub custom_headers: HashMap<String, String>,
|
|
245
385
|
pub request_timeout: u64,
|
|
386
|
+
pub rate_limit_ms: Option<u64>,
|
|
246
387
|
pub max_redirects: usize,
|
|
247
388
|
pub retry_count: usize,
|
|
248
389
|
pub retry_codes: Vec<u16>,
|
|
249
390
|
pub cookies_enabled: bool,
|
|
250
391
|
pub auth: Option<AuthConfig>,
|
|
251
392
|
pub max_body_size: Option<usize>,
|
|
252
|
-
pub main_content_only: bool,
|
|
253
393
|
pub remove_tags: Vec<String>,
|
|
394
|
+
pub content: ContentConfig,
|
|
254
395
|
pub map_limit: Option<usize>,
|
|
255
396
|
pub map_search: Option<String>,
|
|
256
397
|
pub download_assets: bool,
|
|
@@ -322,6 +463,9 @@ impl CrawlConfig {
|
|
|
322
463
|
.get(ruby.to_symbol("request_timeout"))
|
|
323
464
|
.and_then(|v| u64::try_convert(v).ok())
|
|
324
465
|
.unwrap_or(30000),
|
|
466
|
+
rate_limit_ms: kwargs
|
|
467
|
+
.get(ruby.to_symbol("rate_limit_ms"))
|
|
468
|
+
.and_then(|v| u64::try_convert(v).ok()),
|
|
325
469
|
max_redirects: kwargs
|
|
326
470
|
.get(ruby.to_symbol("max_redirects"))
|
|
327
471
|
.and_then(|v| usize::try_convert(v).ok())
|
|
@@ -344,14 +488,14 @@ impl CrawlConfig {
|
|
|
344
488
|
max_body_size: kwargs
|
|
345
489
|
.get(ruby.to_symbol("max_body_size"))
|
|
346
490
|
.and_then(|v| usize::try_convert(v).ok()),
|
|
347
|
-
main_content_only: kwargs
|
|
348
|
-
.get(ruby.to_symbol("main_content_only"))
|
|
349
|
-
.and_then(|v| bool::try_convert(v).ok())
|
|
350
|
-
.unwrap_or(false),
|
|
351
491
|
remove_tags: kwargs
|
|
352
492
|
.get(ruby.to_symbol("remove_tags"))
|
|
353
493
|
.and_then(|v| <Vec<String>>::try_convert(v).ok())
|
|
354
494
|
.unwrap_or_default(),
|
|
495
|
+
content: kwargs
|
|
496
|
+
.get(ruby.to_symbol("content"))
|
|
497
|
+
.and_then(|v| ContentConfig::try_convert(v).ok())
|
|
498
|
+
.unwrap_or_default(),
|
|
355
499
|
map_limit: kwargs
|
|
356
500
|
.get(ruby.to_symbol("map_limit"))
|
|
357
501
|
.and_then(|v| usize::try_convert(v).ok()),
|
|
@@ -452,6 +596,10 @@ impl CrawlConfig {
|
|
|
452
596
|
self.request_timeout.clone()
|
|
453
597
|
}
|
|
454
598
|
|
|
599
|
+
fn rate_limit_ms(&self) -> Option<u64> {
|
|
600
|
+
self.rate_limit_ms
|
|
601
|
+
}
|
|
602
|
+
|
|
455
603
|
fn max_redirects(&self) -> usize {
|
|
456
604
|
self.max_redirects
|
|
457
605
|
}
|
|
@@ -476,14 +624,14 @@ impl CrawlConfig {
|
|
|
476
624
|
self.max_body_size
|
|
477
625
|
}
|
|
478
626
|
|
|
479
|
-
fn main_content_only(&self) -> bool {
|
|
480
|
-
self.main_content_only
|
|
481
|
-
}
|
|
482
|
-
|
|
483
627
|
fn remove_tags(&self) -> Vec<String> {
|
|
484
628
|
self.remove_tags.clone()
|
|
485
629
|
}
|
|
486
630
|
|
|
631
|
+
fn content(&self) -> ContentConfig {
|
|
632
|
+
self.content.clone()
|
|
633
|
+
}
|
|
634
|
+
|
|
487
635
|
fn map_limit(&self) -> Option<usize> {
|
|
488
636
|
self.map_limit
|
|
489
637
|
}
|
|
@@ -558,14 +706,15 @@ impl CrawlConfig {
|
|
|
558
706
|
exclude_paths: self.exclude_paths.clone(),
|
|
559
707
|
custom_headers: self.custom_headers.clone().into_iter().collect(),
|
|
560
708
|
request_timeout: std::time::Duration::from_millis(self.request_timeout),
|
|
709
|
+
rate_limit_ms: self.rate_limit_ms,
|
|
561
710
|
max_redirects: self.max_redirects,
|
|
562
711
|
retry_count: self.retry_count,
|
|
563
712
|
retry_codes: self.retry_codes.clone(),
|
|
564
713
|
cookies_enabled: self.cookies_enabled,
|
|
565
714
|
auth: self.auth.clone().map(Into::into),
|
|
566
715
|
max_body_size: self.max_body_size,
|
|
567
|
-
main_content_only: self.main_content_only,
|
|
568
716
|
remove_tags: self.remove_tags.clone(),
|
|
717
|
+
content: self.content.clone().into(),
|
|
569
718
|
map_limit: self.map_limit,
|
|
570
719
|
map_search: self.map_search.clone(),
|
|
571
720
|
download_assets: self.download_assets,
|
|
@@ -680,140 +829,6 @@ impl DownloadedDocument {
|
|
|
680
829
|
}
|
|
681
830
|
}
|
|
682
831
|
|
|
683
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
684
|
-
#[magnus::wrap(class = "Kreuzcrawl::InteractionResult")]
|
|
685
|
-
#[serde(default)]
|
|
686
|
-
pub struct InteractionResult {
|
|
687
|
-
pub action_results: Vec<ActionResult>,
|
|
688
|
-
pub final_html: String,
|
|
689
|
-
pub final_url: String,
|
|
690
|
-
pub screenshot: Option<Vec<u8>>,
|
|
691
|
-
}
|
|
692
|
-
|
|
693
|
-
unsafe impl IntoValueFromNative for InteractionResult {}
|
|
694
|
-
|
|
695
|
-
impl magnus::TryConvert for InteractionResult {
|
|
696
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
697
|
-
let r: &InteractionResult = magnus::TryConvert::try_convert(val)?;
|
|
698
|
-
Ok(r.clone())
|
|
699
|
-
}
|
|
700
|
-
}
|
|
701
|
-
unsafe impl TryConvertOwned for InteractionResult {}
|
|
702
|
-
|
|
703
|
-
impl Default for InteractionResult {
|
|
704
|
-
fn default() -> Self {
|
|
705
|
-
Self {
|
|
706
|
-
action_results: Default::default(),
|
|
707
|
-
final_html: Default::default(),
|
|
708
|
-
final_url: Default::default(),
|
|
709
|
-
screenshot: Default::default(),
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
|
|
714
|
-
impl InteractionResult {
|
|
715
|
-
fn new(
|
|
716
|
-
action_results: Option<Vec<ActionResult>>,
|
|
717
|
-
final_html: Option<String>,
|
|
718
|
-
final_url: Option<String>,
|
|
719
|
-
screenshot: Option<Vec<u8>>,
|
|
720
|
-
) -> Self {
|
|
721
|
-
Self {
|
|
722
|
-
action_results: action_results.unwrap_or_default(),
|
|
723
|
-
final_html: final_html.unwrap_or_default(),
|
|
724
|
-
final_url: final_url.unwrap_or_default(),
|
|
725
|
-
screenshot,
|
|
726
|
-
}
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
fn action_results(&self) -> Vec<ActionResult> {
|
|
730
|
-
self.action_results.clone()
|
|
731
|
-
}
|
|
732
|
-
|
|
733
|
-
fn final_html(&self) -> String {
|
|
734
|
-
self.final_html.clone()
|
|
735
|
-
}
|
|
736
|
-
|
|
737
|
-
fn final_url(&self) -> String {
|
|
738
|
-
self.final_url.clone()
|
|
739
|
-
}
|
|
740
|
-
|
|
741
|
-
fn screenshot(&self) -> Option<Vec<u8>> {
|
|
742
|
-
self.screenshot.clone()
|
|
743
|
-
}
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
747
|
-
#[magnus::wrap(class = "Kreuzcrawl::ActionResult")]
|
|
748
|
-
#[serde(default)]
|
|
749
|
-
pub struct ActionResult {
|
|
750
|
-
pub action_index: usize,
|
|
751
|
-
pub action_type: String,
|
|
752
|
-
pub success: bool,
|
|
753
|
-
pub data: Option<String>,
|
|
754
|
-
pub error: Option<String>,
|
|
755
|
-
}
|
|
756
|
-
|
|
757
|
-
unsafe impl IntoValueFromNative for ActionResult {}
|
|
758
|
-
|
|
759
|
-
impl magnus::TryConvert for ActionResult {
|
|
760
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
761
|
-
let r: &ActionResult = magnus::TryConvert::try_convert(val)?;
|
|
762
|
-
Ok(r.clone())
|
|
763
|
-
}
|
|
764
|
-
}
|
|
765
|
-
unsafe impl TryConvertOwned for ActionResult {}
|
|
766
|
-
|
|
767
|
-
impl Default for ActionResult {
|
|
768
|
-
fn default() -> Self {
|
|
769
|
-
Self {
|
|
770
|
-
action_index: Default::default(),
|
|
771
|
-
action_type: Default::default(),
|
|
772
|
-
success: Default::default(),
|
|
773
|
-
data: Default::default(),
|
|
774
|
-
error: Default::default(),
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
}
|
|
778
|
-
|
|
779
|
-
impl ActionResult {
|
|
780
|
-
fn new(
|
|
781
|
-
action_index: Option<usize>,
|
|
782
|
-
action_type: Option<String>,
|
|
783
|
-
success: Option<bool>,
|
|
784
|
-
data: Option<String>,
|
|
785
|
-
error: Option<String>,
|
|
786
|
-
) -> Self {
|
|
787
|
-
Self {
|
|
788
|
-
action_index: action_index.unwrap_or_default(),
|
|
789
|
-
action_type: action_type.unwrap_or_default(),
|
|
790
|
-
success: success.unwrap_or_default(),
|
|
791
|
-
data,
|
|
792
|
-
error,
|
|
793
|
-
}
|
|
794
|
-
}
|
|
795
|
-
|
|
796
|
-
fn action_index(&self) -> usize {
|
|
797
|
-
self.action_index
|
|
798
|
-
}
|
|
799
|
-
|
|
800
|
-
fn action_type(&self) -> String {
|
|
801
|
-
self.action_type.clone()
|
|
802
|
-
}
|
|
803
|
-
|
|
804
|
-
fn success(&self) -> bool {
|
|
805
|
-
self.success
|
|
806
|
-
}
|
|
807
|
-
|
|
808
|
-
fn data(&self) -> Option<String> {
|
|
809
|
-
self.data.clone()
|
|
810
|
-
}
|
|
811
|
-
|
|
812
|
-
fn error(&self) -> Option<String> {
|
|
813
|
-
self.error.clone()
|
|
814
|
-
}
|
|
815
|
-
}
|
|
816
|
-
|
|
817
832
|
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
818
833
|
#[magnus::wrap(class = "Kreuzcrawl::ScrapeResult")]
|
|
819
834
|
#[serde(default)]
|
|
@@ -835,7 +850,6 @@ pub struct ScrapeResult {
|
|
|
835
850
|
pub is_pdf: bool,
|
|
836
851
|
pub was_skipped: bool,
|
|
837
852
|
pub detected_charset: Option<String>,
|
|
838
|
-
pub main_content_only: bool,
|
|
839
853
|
pub auth_header_sent: bool,
|
|
840
854
|
pub response_meta: Option<ResponseMeta>,
|
|
841
855
|
pub assets: Vec<DownloadedAsset>,
|
|
@@ -878,7 +892,6 @@ impl Default for ScrapeResult {
|
|
|
878
892
|
is_pdf: Default::default(),
|
|
879
893
|
was_skipped: Default::default(),
|
|
880
894
|
detected_charset: Default::default(),
|
|
881
|
-
main_content_only: Default::default(),
|
|
882
895
|
auth_header_sent: Default::default(),
|
|
883
896
|
response_meta: Default::default(),
|
|
884
897
|
assets: Default::default(),
|
|
@@ -962,10 +975,6 @@ impl ScrapeResult {
|
|
|
962
975
|
detected_charset: kwargs
|
|
963
976
|
.get(ruby.to_symbol("detected_charset"))
|
|
964
977
|
.and_then(|v| String::try_convert(v).ok()),
|
|
965
|
-
main_content_only: kwargs
|
|
966
|
-
.get(ruby.to_symbol("main_content_only"))
|
|
967
|
-
.and_then(|v| bool::try_convert(v).ok())
|
|
968
|
-
.unwrap_or_default(),
|
|
969
978
|
auth_header_sent: kwargs
|
|
970
979
|
.get(ruby.to_symbol("auth_header_sent"))
|
|
971
980
|
.and_then(|v| bool::try_convert(v).ok())
|
|
@@ -1071,10 +1080,6 @@ impl ScrapeResult {
|
|
|
1071
1080
|
self.detected_charset.clone()
|
|
1072
1081
|
}
|
|
1073
1082
|
|
|
1074
|
-
fn main_content_only(&self) -> bool {
|
|
1075
|
-
self.main_content_only
|
|
1076
|
-
}
|
|
1077
|
-
|
|
1078
1083
|
fn auth_header_sent(&self) -> bool {
|
|
1079
1084
|
self.auth_header_sent
|
|
1080
1085
|
}
|
|
@@ -1616,93 +1621,6 @@ impl MarkdownResult {
|
|
|
1616
1621
|
}
|
|
1617
1622
|
}
|
|
1618
1623
|
|
|
1619
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
1620
|
-
#[magnus::wrap(class = "Kreuzcrawl::CachedPage")]
|
|
1621
|
-
#[serde(default)]
|
|
1622
|
-
pub struct CachedPage {
|
|
1623
|
-
pub url: String,
|
|
1624
|
-
pub status_code: u16,
|
|
1625
|
-
pub content_type: String,
|
|
1626
|
-
pub body: String,
|
|
1627
|
-
pub etag: Option<String>,
|
|
1628
|
-
pub last_modified: Option<String>,
|
|
1629
|
-
pub cached_at: u64,
|
|
1630
|
-
}
|
|
1631
|
-
|
|
1632
|
-
unsafe impl IntoValueFromNative for CachedPage {}
|
|
1633
|
-
|
|
1634
|
-
impl magnus::TryConvert for CachedPage {
|
|
1635
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
1636
|
-
let r: &CachedPage = magnus::TryConvert::try_convert(val)?;
|
|
1637
|
-
Ok(r.clone())
|
|
1638
|
-
}
|
|
1639
|
-
}
|
|
1640
|
-
unsafe impl TryConvertOwned for CachedPage {}
|
|
1641
|
-
|
|
1642
|
-
impl Default for CachedPage {
|
|
1643
|
-
fn default() -> Self {
|
|
1644
|
-
Self {
|
|
1645
|
-
url: Default::default(),
|
|
1646
|
-
status_code: Default::default(),
|
|
1647
|
-
content_type: Default::default(),
|
|
1648
|
-
body: Default::default(),
|
|
1649
|
-
etag: Default::default(),
|
|
1650
|
-
last_modified: Default::default(),
|
|
1651
|
-
cached_at: Default::default(),
|
|
1652
|
-
}
|
|
1653
|
-
}
|
|
1654
|
-
}
|
|
1655
|
-
|
|
1656
|
-
impl CachedPage {
|
|
1657
|
-
fn new(
|
|
1658
|
-
url: Option<String>,
|
|
1659
|
-
status_code: Option<u16>,
|
|
1660
|
-
content_type: Option<String>,
|
|
1661
|
-
body: Option<String>,
|
|
1662
|
-
etag: Option<String>,
|
|
1663
|
-
last_modified: Option<String>,
|
|
1664
|
-
cached_at: Option<u64>,
|
|
1665
|
-
) -> Self {
|
|
1666
|
-
Self {
|
|
1667
|
-
url: url.unwrap_or_default(),
|
|
1668
|
-
status_code: status_code.unwrap_or_default(),
|
|
1669
|
-
content_type: content_type.unwrap_or_default(),
|
|
1670
|
-
body: body.unwrap_or_default(),
|
|
1671
|
-
etag,
|
|
1672
|
-
last_modified,
|
|
1673
|
-
cached_at: cached_at.unwrap_or_default(),
|
|
1674
|
-
}
|
|
1675
|
-
}
|
|
1676
|
-
|
|
1677
|
-
fn url(&self) -> String {
|
|
1678
|
-
self.url.clone()
|
|
1679
|
-
}
|
|
1680
|
-
|
|
1681
|
-
fn status_code(&self) -> u16 {
|
|
1682
|
-
self.status_code
|
|
1683
|
-
}
|
|
1684
|
-
|
|
1685
|
-
fn content_type(&self) -> String {
|
|
1686
|
-
self.content_type.clone()
|
|
1687
|
-
}
|
|
1688
|
-
|
|
1689
|
-
fn body(&self) -> String {
|
|
1690
|
-
self.body.clone()
|
|
1691
|
-
}
|
|
1692
|
-
|
|
1693
|
-
fn etag(&self) -> Option<String> {
|
|
1694
|
-
self.etag.clone()
|
|
1695
|
-
}
|
|
1696
|
-
|
|
1697
|
-
fn last_modified(&self) -> Option<String> {
|
|
1698
|
-
self.last_modified.clone()
|
|
1699
|
-
}
|
|
1700
|
-
|
|
1701
|
-
fn cached_at(&self) -> u64 {
|
|
1702
|
-
self.cached_at
|
|
1703
|
-
}
|
|
1704
|
-
}
|
|
1705
|
-
|
|
1706
1624
|
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
|
|
1707
1625
|
#[magnus::wrap(class = "Kreuzcrawl::LinkInfo")]
|
|
1708
1626
|
#[serde(default)]
|
|
@@ -3295,39 +3213,6 @@ impl magnus::TryConvert for AssetCategory {
|
|
|
3295
3213
|
unsafe impl IntoValueFromNative for AssetCategory {}
|
|
3296
3214
|
unsafe impl TryConvertOwned for AssetCategory {}
|
|
3297
3215
|
|
|
3298
|
-
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
|
|
3299
|
-
pub enum CrawlEvent {
|
|
3300
|
-
Page { _0: CrawlPageResult },
|
|
3301
|
-
Error { url: String, error: String },
|
|
3302
|
-
Complete { pages_crawled: usize },
|
|
3303
|
-
}
|
|
3304
|
-
|
|
3305
|
-
impl Default for CrawlEvent {
|
|
3306
|
-
fn default() -> Self {
|
|
3307
|
-
Self::Page { _0: Default::default() }
|
|
3308
|
-
}
|
|
3309
|
-
}
|
|
3310
|
-
|
|
3311
|
-
impl magnus::IntoValue for CrawlEvent {
|
|
3312
|
-
fn into_value_with(self, handle: &Ruby) -> magnus::Value {
|
|
3313
|
-
match serde_json::to_value(&self) {
|
|
3314
|
-
Ok(v) => json_to_ruby(handle, v),
|
|
3315
|
-
Err(_) => handle.qnil().into_value_with(handle),
|
|
3316
|
-
}
|
|
3317
|
-
}
|
|
3318
|
-
}
|
|
3319
|
-
|
|
3320
|
-
impl magnus::TryConvert for CrawlEvent {
|
|
3321
|
-
fn try_convert(val: magnus::Value) -> Result<Self, magnus::Error> {
|
|
3322
|
-
let s: String = magnus::TryConvert::try_convert(val)?;
|
|
3323
|
-
serde_json::from_str(&s)
|
|
3324
|
-
.map_err(|e| magnus::Error::new(unsafe { Ruby::get_unchecked() }.exception_type_error(), e.to_string()))
|
|
3325
|
-
}
|
|
3326
|
-
}
|
|
3327
|
-
|
|
3328
|
-
unsafe impl IntoValueFromNative for CrawlEvent {}
|
|
3329
|
-
unsafe impl TryConvertOwned for CrawlEvent {}
|
|
3330
|
-
|
|
3331
3216
|
fn create_engine(config: Option<String>) -> Result<CrawlEngineHandle, Error> {
|
|
3332
3217
|
let config: Option<CrawlConfig> = config
|
|
3333
3218
|
.as_deref()
|
|
@@ -3546,6 +3431,44 @@ impl From<kreuzcrawl::ProxyConfig> for ProxyConfig {
|
|
|
3546
3431
|
}
|
|
3547
3432
|
}
|
|
3548
3433
|
|
|
3434
|
+
impl From<ContentConfig> for kreuzcrawl::ContentConfig {
|
|
3435
|
+
fn from(val: ContentConfig) -> Self {
|
|
3436
|
+
Self {
|
|
3437
|
+
output_format: val.output_format,
|
|
3438
|
+
preprocessing_preset: val.preprocessing_preset,
|
|
3439
|
+
remove_navigation: val.remove_navigation,
|
|
3440
|
+
remove_forms: val.remove_forms,
|
|
3441
|
+
strip_tags: val.strip_tags,
|
|
3442
|
+
preserve_tags: val.preserve_tags,
|
|
3443
|
+
exclude_selectors: val.exclude_selectors,
|
|
3444
|
+
skip_images: val.skip_images,
|
|
3445
|
+
max_depth: val.max_depth,
|
|
3446
|
+
wrap: val.wrap,
|
|
3447
|
+
wrap_width: val.wrap_width,
|
|
3448
|
+
include_document_structure: val.include_document_structure,
|
|
3449
|
+
}
|
|
3450
|
+
}
|
|
3451
|
+
}
|
|
3452
|
+
|
|
3453
|
+
impl From<kreuzcrawl::ContentConfig> for ContentConfig {
|
|
3454
|
+
fn from(val: kreuzcrawl::ContentConfig) -> Self {
|
|
3455
|
+
Self {
|
|
3456
|
+
output_format: val.output_format,
|
|
3457
|
+
preprocessing_preset: val.preprocessing_preset,
|
|
3458
|
+
remove_navigation: val.remove_navigation,
|
|
3459
|
+
remove_forms: val.remove_forms,
|
|
3460
|
+
strip_tags: val.strip_tags,
|
|
3461
|
+
preserve_tags: val.preserve_tags,
|
|
3462
|
+
exclude_selectors: val.exclude_selectors,
|
|
3463
|
+
skip_images: val.skip_images,
|
|
3464
|
+
max_depth: val.max_depth,
|
|
3465
|
+
wrap: val.wrap,
|
|
3466
|
+
wrap_width: val.wrap_width,
|
|
3467
|
+
include_document_structure: val.include_document_structure,
|
|
3468
|
+
}
|
|
3469
|
+
}
|
|
3470
|
+
}
|
|
3471
|
+
|
|
3549
3472
|
impl From<BrowserConfig> for kreuzcrawl::BrowserConfig {
|
|
3550
3473
|
fn from(val: BrowserConfig) -> Self {
|
|
3551
3474
|
Self {
|
|
@@ -3587,14 +3510,15 @@ impl From<CrawlConfig> for kreuzcrawl::CrawlConfig {
|
|
|
3587
3510
|
exclude_paths: val.exclude_paths,
|
|
3588
3511
|
custom_headers: val.custom_headers.into_iter().collect(),
|
|
3589
3512
|
request_timeout: std::time::Duration::from_millis(val.request_timeout),
|
|
3513
|
+
rate_limit_ms: val.rate_limit_ms,
|
|
3590
3514
|
max_redirects: val.max_redirects,
|
|
3591
3515
|
retry_count: val.retry_count,
|
|
3592
3516
|
retry_codes: val.retry_codes,
|
|
3593
3517
|
cookies_enabled: val.cookies_enabled,
|
|
3594
3518
|
auth: val.auth.map(Into::into),
|
|
3595
3519
|
max_body_size: val.max_body_size,
|
|
3596
|
-
main_content_only: val.main_content_only,
|
|
3597
3520
|
remove_tags: val.remove_tags,
|
|
3521
|
+
content: val.content.into(),
|
|
3598
3522
|
map_limit: val.map_limit,
|
|
3599
3523
|
map_search: val.map_search,
|
|
3600
3524
|
download_assets: val.download_assets,
|
|
@@ -3629,14 +3553,15 @@ impl From<kreuzcrawl::CrawlConfig> for CrawlConfig {
|
|
|
3629
3553
|
exclude_paths: val.exclude_paths,
|
|
3630
3554
|
custom_headers: val.custom_headers.into_iter().collect(),
|
|
3631
3555
|
request_timeout: val.request_timeout.as_millis() as u64,
|
|
3556
|
+
rate_limit_ms: val.rate_limit_ms,
|
|
3632
3557
|
max_redirects: val.max_redirects,
|
|
3633
3558
|
retry_count: val.retry_count,
|
|
3634
3559
|
retry_codes: val.retry_codes,
|
|
3635
3560
|
cookies_enabled: val.cookies_enabled,
|
|
3636
3561
|
auth: val.auth.map(Into::into),
|
|
3637
3562
|
max_body_size: val.max_body_size,
|
|
3638
|
-
main_content_only: val.main_content_only,
|
|
3639
3563
|
remove_tags: val.remove_tags,
|
|
3564
|
+
content: val.content.into(),
|
|
3640
3565
|
map_limit: val.map_limit,
|
|
3641
3566
|
map_search: val.map_search,
|
|
3642
3567
|
download_assets: val.download_assets,
|
|
@@ -3677,40 +3602,17 @@ impl From<kreuzcrawl::DownloadedDocument> for DownloadedDocument {
|
|
|
3677
3602
|
mime_type: format!("{:?}", val.mime_type),
|
|
3678
3603
|
content: val.content.to_vec(),
|
|
3679
3604
|
size: val.size,
|
|
3680
|
-
filename: val.filename.as_ref().map(|v| format!("{:?}"
|
|
3605
|
+
filename: val.filename.as_ref().map(|v| format!("{v:?}")),
|
|
3681
3606
|
content_hash: format!("{:?}", val.content_hash),
|
|
3682
3607
|
headers: val
|
|
3683
3608
|
.headers
|
|
3684
3609
|
.into_iter()
|
|
3685
|
-
.map(|(k, v)| (
|
|
3610
|
+
.map(|(k, v)| (k.to_string(), v.to_string()))
|
|
3686
3611
|
.collect(),
|
|
3687
3612
|
}
|
|
3688
3613
|
}
|
|
3689
3614
|
}
|
|
3690
3615
|
|
|
3691
|
-
impl From<kreuzcrawl::InteractionResult> for InteractionResult {
|
|
3692
|
-
fn from(val: kreuzcrawl::InteractionResult) -> Self {
|
|
3693
|
-
Self {
|
|
3694
|
-
action_results: val.action_results.into_iter().map(Into::into).collect(),
|
|
3695
|
-
final_html: val.final_html,
|
|
3696
|
-
final_url: val.final_url,
|
|
3697
|
-
screenshot: val.screenshot.map(|v| v.to_vec()),
|
|
3698
|
-
}
|
|
3699
|
-
}
|
|
3700
|
-
}
|
|
3701
|
-
|
|
3702
|
-
impl From<kreuzcrawl::ActionResult> for ActionResult {
|
|
3703
|
-
fn from(val: kreuzcrawl::ActionResult) -> Self {
|
|
3704
|
-
Self {
|
|
3705
|
-
action_index: val.action_index,
|
|
3706
|
-
action_type: format!("{:?}", val.action_type),
|
|
3707
|
-
success: val.success,
|
|
3708
|
-
data: val.data.as_ref().map(ToString::to_string),
|
|
3709
|
-
error: val.error,
|
|
3710
|
-
}
|
|
3711
|
-
}
|
|
3712
|
-
}
|
|
3713
|
-
|
|
3714
3616
|
impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
3715
3617
|
fn from(val: ScrapeResult) -> Self {
|
|
3716
3618
|
Self {
|
|
@@ -3731,7 +3633,6 @@ impl From<ScrapeResult> for kreuzcrawl::ScrapeResult {
|
|
|
3731
3633
|
is_pdf: val.is_pdf,
|
|
3732
3634
|
was_skipped: val.was_skipped,
|
|
3733
3635
|
detected_charset: val.detected_charset,
|
|
3734
|
-
main_content_only: val.main_content_only,
|
|
3735
3636
|
auth_header_sent: val.auth_header_sent,
|
|
3736
3637
|
response_meta: val.response_meta.map(Into::into),
|
|
3737
3638
|
assets: val.assets.into_iter().map(Into::into).collect(),
|
|
@@ -3766,7 +3667,6 @@ impl From<kreuzcrawl::ScrapeResult> for ScrapeResult {
|
|
|
3766
3667
|
is_pdf: val.is_pdf,
|
|
3767
3668
|
was_skipped: val.was_skipped,
|
|
3768
3669
|
detected_charset: val.detected_charset,
|
|
3769
|
-
main_content_only: val.main_content_only,
|
|
3770
3670
|
auth_header_sent: val.auth_header_sent,
|
|
3771
3671
|
response_meta: val.response_meta.map(Into::into),
|
|
3772
3672
|
assets: val.assets.into_iter().map(Into::into).collect(),
|
|
@@ -3934,20 +3834,6 @@ impl From<kreuzcrawl::MarkdownResult> for MarkdownResult {
|
|
|
3934
3834
|
}
|
|
3935
3835
|
}
|
|
3936
3836
|
|
|
3937
|
-
impl From<kreuzcrawl::CachedPage> for CachedPage {
|
|
3938
|
-
fn from(val: kreuzcrawl::CachedPage) -> Self {
|
|
3939
|
-
Self {
|
|
3940
|
-
url: val.url,
|
|
3941
|
-
status_code: val.status_code,
|
|
3942
|
-
content_type: val.content_type,
|
|
3943
|
-
body: val.body,
|
|
3944
|
-
etag: val.etag,
|
|
3945
|
-
last_modified: val.last_modified,
|
|
3946
|
-
cached_at: val.cached_at,
|
|
3947
|
-
}
|
|
3948
|
-
}
|
|
3949
|
-
}
|
|
3950
|
-
|
|
3951
3837
|
impl From<LinkInfo> for kreuzcrawl::LinkInfo {
|
|
3952
3838
|
fn from(val: LinkInfo) -> Self {
|
|
3953
3839
|
Self {
|
|
@@ -4530,16 +4416,6 @@ impl From<kreuzcrawl::AssetCategory> for AssetCategory {
|
|
|
4530
4416
|
}
|
|
4531
4417
|
}
|
|
4532
4418
|
|
|
4533
|
-
impl From<kreuzcrawl::CrawlEvent> for CrawlEvent {
|
|
4534
|
-
fn from(val: kreuzcrawl::CrawlEvent) -> Self {
|
|
4535
|
-
match val {
|
|
4536
|
-
kreuzcrawl::CrawlEvent::Page(_0) => Self::Page { _0: (*_0).into() },
|
|
4537
|
-
kreuzcrawl::CrawlEvent::Error { url, error } => Self::Error { url, error },
|
|
4538
|
-
kreuzcrawl::CrawlEvent::Complete { pages_crawled } => Self::Complete { pages_crawled },
|
|
4539
|
-
}
|
|
4540
|
-
}
|
|
4541
|
-
}
|
|
4542
|
-
|
|
4543
4419
|
/// Convert a `kreuzcrawl::CrawlError` error to a Magnus runtime error.
|
|
4544
4420
|
#[allow(dead_code)]
|
|
4545
4421
|
fn crawl_error_to_magnus_err(e: kreuzcrawl::CrawlError) -> magnus::Error {
|
|
@@ -4565,6 +4441,24 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4565
4441
|
class.define_method("username", method!(ProxyConfig::username, 0))?;
|
|
4566
4442
|
class.define_method("password", method!(ProxyConfig::password, 0))?;
|
|
4567
4443
|
|
|
4444
|
+
let class = module.define_class("ContentConfig", ruby.class_object())?;
|
|
4445
|
+
class.define_singleton_method("new", function!(ContentConfig::new, 12))?;
|
|
4446
|
+
class.define_method("output_format", method!(ContentConfig::output_format, 0))?;
|
|
4447
|
+
class.define_method("preprocessing_preset", method!(ContentConfig::preprocessing_preset, 0))?;
|
|
4448
|
+
class.define_method("remove_navigation", method!(ContentConfig::remove_navigation, 0))?;
|
|
4449
|
+
class.define_method("remove_forms", method!(ContentConfig::remove_forms, 0))?;
|
|
4450
|
+
class.define_method("strip_tags", method!(ContentConfig::strip_tags, 0))?;
|
|
4451
|
+
class.define_method("preserve_tags", method!(ContentConfig::preserve_tags, 0))?;
|
|
4452
|
+
class.define_method("exclude_selectors", method!(ContentConfig::exclude_selectors, 0))?;
|
|
4453
|
+
class.define_method("skip_images", method!(ContentConfig::skip_images, 0))?;
|
|
4454
|
+
class.define_method("max_depth", method!(ContentConfig::max_depth, 0))?;
|
|
4455
|
+
class.define_method("wrap", method!(ContentConfig::wrap, 0))?;
|
|
4456
|
+
class.define_method("wrap_width", method!(ContentConfig::wrap_width, 0))?;
|
|
4457
|
+
class.define_method(
|
|
4458
|
+
"include_document_structure",
|
|
4459
|
+
method!(ContentConfig::include_document_structure, 0),
|
|
4460
|
+
)?;
|
|
4461
|
+
|
|
4568
4462
|
let class = module.define_class("BrowserConfig", ruby.class_object())?;
|
|
4569
4463
|
class.define_singleton_method("new", function!(BrowserConfig::new, 6))?;
|
|
4570
4464
|
class.define_method("mode", method!(BrowserConfig::mode, 0))?;
|
|
@@ -4587,14 +4481,15 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4587
4481
|
class.define_method("exclude_paths", method!(CrawlConfig::exclude_paths, 0))?;
|
|
4588
4482
|
class.define_method("custom_headers", method!(CrawlConfig::custom_headers, 0))?;
|
|
4589
4483
|
class.define_method("request_timeout", method!(CrawlConfig::request_timeout, 0))?;
|
|
4484
|
+
class.define_method("rate_limit_ms", method!(CrawlConfig::rate_limit_ms, 0))?;
|
|
4590
4485
|
class.define_method("max_redirects", method!(CrawlConfig::max_redirects, 0))?;
|
|
4591
4486
|
class.define_method("retry_count", method!(CrawlConfig::retry_count, 0))?;
|
|
4592
4487
|
class.define_method("retry_codes", method!(CrawlConfig::retry_codes, 0))?;
|
|
4593
4488
|
class.define_method("cookies_enabled", method!(CrawlConfig::cookies_enabled, 0))?;
|
|
4594
4489
|
class.define_method("auth", method!(CrawlConfig::auth, 0))?;
|
|
4595
4490
|
class.define_method("max_body_size", method!(CrawlConfig::max_body_size, 0))?;
|
|
4596
|
-
class.define_method("main_content_only", method!(CrawlConfig::main_content_only, 0))?;
|
|
4597
4491
|
class.define_method("remove_tags", method!(CrawlConfig::remove_tags, 0))?;
|
|
4492
|
+
class.define_method("content", method!(CrawlConfig::content, 0))?;
|
|
4598
4493
|
class.define_method("map_limit", method!(CrawlConfig::map_limit, 0))?;
|
|
4599
4494
|
class.define_method("map_search", method!(CrawlConfig::map_search, 0))?;
|
|
4600
4495
|
class.define_method("download_assets", method!(CrawlConfig::download_assets, 0))?;
|
|
@@ -4622,21 +4517,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4622
4517
|
class.define_method("content_hash", method!(DownloadedDocument::content_hash, 0))?;
|
|
4623
4518
|
class.define_method("headers", method!(DownloadedDocument::headers, 0))?;
|
|
4624
4519
|
|
|
4625
|
-
let class = module.define_class("InteractionResult", ruby.class_object())?;
|
|
4626
|
-
class.define_singleton_method("new", function!(InteractionResult::new, 4))?;
|
|
4627
|
-
class.define_method("action_results", method!(InteractionResult::action_results, 0))?;
|
|
4628
|
-
class.define_method("final_html", method!(InteractionResult::final_html, 0))?;
|
|
4629
|
-
class.define_method("final_url", method!(InteractionResult::final_url, 0))?;
|
|
4630
|
-
class.define_method("screenshot", method!(InteractionResult::screenshot, 0))?;
|
|
4631
|
-
|
|
4632
|
-
let class = module.define_class("ActionResult", ruby.class_object())?;
|
|
4633
|
-
class.define_singleton_method("new", function!(ActionResult::new, 5))?;
|
|
4634
|
-
class.define_method("action_index", method!(ActionResult::action_index, 0))?;
|
|
4635
|
-
class.define_method("action_type", method!(ActionResult::action_type, 0))?;
|
|
4636
|
-
class.define_method("success", method!(ActionResult::success, 0))?;
|
|
4637
|
-
class.define_method("data", method!(ActionResult::data, 0))?;
|
|
4638
|
-
class.define_method("error", method!(ActionResult::error, 0))?;
|
|
4639
|
-
|
|
4640
4520
|
let class = module.define_class("ScrapeResult", ruby.class_object())?;
|
|
4641
4521
|
class.define_singleton_method("new", function!(ScrapeResult::new, 1))?;
|
|
4642
4522
|
class.define_method("status_code", method!(ScrapeResult::status_code, 0))?;
|
|
@@ -4656,7 +4536,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4656
4536
|
class.define_method("is_pdf", method!(ScrapeResult::is_pdf, 0))?;
|
|
4657
4537
|
class.define_method("was_skipped", method!(ScrapeResult::was_skipped, 0))?;
|
|
4658
4538
|
class.define_method("detected_charset", method!(ScrapeResult::detected_charset, 0))?;
|
|
4659
|
-
class.define_method("main_content_only", method!(ScrapeResult::main_content_only, 0))?;
|
|
4660
4539
|
class.define_method("auth_header_sent", method!(ScrapeResult::auth_header_sent, 0))?;
|
|
4661
4540
|
class.define_method("response_meta", method!(ScrapeResult::response_meta, 0))?;
|
|
4662
4541
|
class.define_method("assets", method!(ScrapeResult::assets, 0))?;
|
|
@@ -4725,16 +4604,6 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
|
|
|
4725
4604
|
class.define_method("citations", method!(MarkdownResult::citations, 0))?;
|
|
4726
4605
|
class.define_method("fit_content", method!(MarkdownResult::fit_content, 0))?;
|
|
4727
4606
|
|
|
4728
|
-
let class = module.define_class("CachedPage", ruby.class_object())?;
|
|
4729
|
-
class.define_singleton_method("new", function!(CachedPage::new, 7))?;
|
|
4730
|
-
class.define_method("url", method!(CachedPage::url, 0))?;
|
|
4731
|
-
class.define_method("status_code", method!(CachedPage::status_code, 0))?;
|
|
4732
|
-
class.define_method("content_type", method!(CachedPage::content_type, 0))?;
|
|
4733
|
-
class.define_method("body", method!(CachedPage::body, 0))?;
|
|
4734
|
-
class.define_method("etag", method!(CachedPage::etag, 0))?;
|
|
4735
|
-
class.define_method("last_modified", method!(CachedPage::last_modified, 0))?;
|
|
4736
|
-
class.define_method("cached_at", method!(CachedPage::cached_at, 0))?;
|
|
4737
|
-
|
|
4738
4607
|
let class = module.define_class("LinkInfo", ruby.class_object())?;
|
|
4739
4608
|
class.define_singleton_method("new", function!(LinkInfo::new, 5))?;
|
|
4740
4609
|
class.define_method("url", method!(LinkInfo::url, 0))?;
|