crawlberg 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/crawlberg_rb/native/Cargo.lock +66 -30
- data/ext/crawlberg_rb/native/Cargo.toml +5 -2
- data/ext/crawlberg_rb/src/lib.rs +7 -84
- data/lib/crawlberg/native.rb +81 -39
- data/lib/crawlberg/version.rb +2 -2
- data/lib/crawlberg.rb +10 -1
- data/lib/crawlberg_rb.so +0 -0
- data/sig/types.rbs +372 -372
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 81f77a1002fde8de55855f9ce9285aeebf14d04e550cfc3645f5f5c0ca5aaa78
|
|
4
|
+
data.tar.gz: dc26f9bd898faba3d5ff1a0676fbf3f5b5dcd27baa272b5f306a2b70a68084c3
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3334fa6988a8edc0bbc3521592d8e27e0443fd076f381ded58431e277cfc2e0633921e285255a0ef0974c31a19f981860aec02a5713e542c89629bbcb2dbfdce
|
|
7
|
+
data.tar.gz: 8a5e263e93fc4fee1c7f7066512519f6272a2d1b91c1df715ef8d640ad92872ee85ccf36faa6f1f4a64b51db954024f80568caabd3a86749755ca47ab5b43acd
|
|
@@ -145,9 +145,9 @@ checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
|
|
|
145
145
|
|
|
146
146
|
[[package]]
|
|
147
147
|
name = "aws-lc-rs"
|
|
148
|
-
version = "1.17.
|
|
148
|
+
version = "1.17.1"
|
|
149
149
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
150
|
-
checksum = "
|
|
150
|
+
checksum = "4342d8937fc7e5dd9b1c60292261c0670c882a2cd1719cfc11b1af41731e32ad"
|
|
151
151
|
dependencies = [
|
|
152
152
|
"aws-lc-sys",
|
|
153
153
|
"zeroize",
|
|
@@ -155,14 +155,15 @@ dependencies = [
|
|
|
155
155
|
|
|
156
156
|
[[package]]
|
|
157
157
|
name = "aws-lc-sys"
|
|
158
|
-
version = "0.
|
|
158
|
+
version = "0.42.0"
|
|
159
159
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
160
|
-
checksum = "
|
|
160
|
+
checksum = "6d9ceb1da931507a12f4fccea479dccd00da1943e1b4ae72d8e502d707361444"
|
|
161
161
|
dependencies = [
|
|
162
162
|
"cc",
|
|
163
163
|
"cmake",
|
|
164
164
|
"dunce",
|
|
165
165
|
"fs_extra",
|
|
166
|
+
"pkg-config",
|
|
166
167
|
]
|
|
167
168
|
|
|
168
169
|
[[package]]
|
|
@@ -544,9 +545,9 @@ dependencies = [
|
|
|
544
545
|
|
|
545
546
|
[[package]]
|
|
546
547
|
name = "crawlberg"
|
|
547
|
-
version = "1.0.
|
|
548
|
+
version = "1.0.2"
|
|
548
549
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
549
|
-
checksum = "
|
|
550
|
+
checksum = "e361dcd0b6433eaa4cc3e269f2e983e7f8ddea09b597bc5257d9a356e3d8882c"
|
|
550
551
|
dependencies = [
|
|
551
552
|
"ahash",
|
|
552
553
|
"aho-corasick",
|
|
@@ -586,7 +587,7 @@ dependencies = [
|
|
|
586
587
|
|
|
587
588
|
[[package]]
|
|
588
589
|
name = "crawlberg-rb"
|
|
589
|
-
version = "1.0.
|
|
590
|
+
version = "1.0.2"
|
|
590
591
|
dependencies = [
|
|
591
592
|
"crawlberg",
|
|
592
593
|
"futures",
|
|
@@ -1042,9 +1043,9 @@ dependencies = [
|
|
|
1042
1043
|
|
|
1043
1044
|
[[package]]
|
|
1044
1045
|
name = "html-to-markdown-rs"
|
|
1045
|
-
version = "3.
|
|
1046
|
+
version = "3.8.0"
|
|
1046
1047
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1047
|
-
checksum = "
|
|
1048
|
+
checksum = "da9935e81a6fee38ebbc5a2ca4e9590f99f650ffb8a08734c4115a3e06c3c5d3"
|
|
1048
1049
|
dependencies = [
|
|
1049
1050
|
"ahash",
|
|
1050
1051
|
"astral-tl",
|
|
@@ -1056,7 +1057,7 @@ dependencies = [
|
|
|
1056
1057
|
"lru",
|
|
1057
1058
|
"memchr",
|
|
1058
1059
|
"once_cell",
|
|
1059
|
-
"phf",
|
|
1060
|
+
"phf 0.14.0",
|
|
1060
1061
|
"regex",
|
|
1061
1062
|
"serde",
|
|
1062
1063
|
"serde_json",
|
|
@@ -1765,9 +1766,19 @@ name = "phf"
|
|
|
1765
1766
|
version = "0.13.1"
|
|
1766
1767
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1767
1768
|
checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf"
|
|
1769
|
+
dependencies = [
|
|
1770
|
+
"phf_shared 0.13.1",
|
|
1771
|
+
"serde",
|
|
1772
|
+
]
|
|
1773
|
+
|
|
1774
|
+
[[package]]
|
|
1775
|
+
name = "phf"
|
|
1776
|
+
version = "0.14.0"
|
|
1777
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1778
|
+
checksum = "010378780309880b08997fae13be7834dba947d36393bd372f2b1556deb2a2f6"
|
|
1768
1779
|
dependencies = [
|
|
1769
1780
|
"phf_macros",
|
|
1770
|
-
"phf_shared",
|
|
1781
|
+
"phf_shared 0.14.0",
|
|
1771
1782
|
"serde",
|
|
1772
1783
|
]
|
|
1773
1784
|
|
|
@@ -1777,8 +1788,8 @@ version = "0.13.1"
|
|
|
1777
1788
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1778
1789
|
checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1"
|
|
1779
1790
|
dependencies = [
|
|
1780
|
-
"phf_generator",
|
|
1781
|
-
"phf_shared",
|
|
1791
|
+
"phf_generator 0.13.1",
|
|
1792
|
+
"phf_shared 0.13.1",
|
|
1782
1793
|
]
|
|
1783
1794
|
|
|
1784
1795
|
[[package]]
|
|
@@ -1788,17 +1799,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
|
1788
1799
|
checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737"
|
|
1789
1800
|
dependencies = [
|
|
1790
1801
|
"fastrand",
|
|
1791
|
-
"phf_shared",
|
|
1802
|
+
"phf_shared 0.13.1",
|
|
1803
|
+
]
|
|
1804
|
+
|
|
1805
|
+
[[package]]
|
|
1806
|
+
name = "phf_generator"
|
|
1807
|
+
version = "0.14.0"
|
|
1808
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1809
|
+
checksum = "aeb62e0959d5a1bebc965f4d15d9e2b7cea002b6b0f5ba8cde6cc26738467100"
|
|
1810
|
+
dependencies = [
|
|
1811
|
+
"fastrand",
|
|
1812
|
+
"phf_shared 0.14.0",
|
|
1792
1813
|
]
|
|
1793
1814
|
|
|
1794
1815
|
[[package]]
|
|
1795
1816
|
name = "phf_macros"
|
|
1796
|
-
version = "0.
|
|
1817
|
+
version = "0.14.0"
|
|
1797
1818
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1798
|
-
checksum = "
|
|
1819
|
+
checksum = "5fa8d0ca26d424d27630da600c6624696e7dec8bf7b3b492b383c5dc49e5e085"
|
|
1799
1820
|
dependencies = [
|
|
1800
|
-
"phf_generator",
|
|
1801
|
-
"phf_shared",
|
|
1821
|
+
"phf_generator 0.14.0",
|
|
1822
|
+
"phf_shared 0.14.0",
|
|
1802
1823
|
"proc-macro2",
|
|
1803
1824
|
"quote",
|
|
1804
1825
|
"syn",
|
|
@@ -1813,12 +1834,27 @@ dependencies = [
|
|
|
1813
1834
|
"siphasher",
|
|
1814
1835
|
]
|
|
1815
1836
|
|
|
1837
|
+
[[package]]
|
|
1838
|
+
name = "phf_shared"
|
|
1839
|
+
version = "0.14.0"
|
|
1840
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1841
|
+
checksum = "c6fd9027e2d9319be6349febd1db4e8d02aa544921200c9b777720ac34a3aa89"
|
|
1842
|
+
dependencies = [
|
|
1843
|
+
"siphasher",
|
|
1844
|
+
]
|
|
1845
|
+
|
|
1816
1846
|
[[package]]
|
|
1817
1847
|
name = "pin-project-lite"
|
|
1818
1848
|
version = "0.2.17"
|
|
1819
1849
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1820
1850
|
checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
|
|
1821
1851
|
|
|
1852
|
+
[[package]]
|
|
1853
|
+
name = "pkg-config"
|
|
1854
|
+
version = "0.3.33"
|
|
1855
|
+
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1856
|
+
checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
|
|
1857
|
+
|
|
1822
1858
|
[[package]]
|
|
1823
1859
|
name = "png"
|
|
1824
1860
|
version = "0.18.1"
|
|
@@ -1901,9 +1937,9 @@ checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
|
|
|
1901
1937
|
|
|
1902
1938
|
[[package]]
|
|
1903
1939
|
name = "quick-xml"
|
|
1904
|
-
version = "0.
|
|
1940
|
+
version = "0.41.0"
|
|
1905
1941
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
1906
|
-
checksum = "
|
|
1942
|
+
checksum = "e660451e55124f798a69a5af3f49ccfbefbd41910eefd25caf2393e1f3473ec1"
|
|
1907
1943
|
dependencies = [
|
|
1908
1944
|
"memchr",
|
|
1909
1945
|
]
|
|
@@ -2190,9 +2226,9 @@ dependencies = [
|
|
|
2190
2226
|
|
|
2191
2227
|
[[package]]
|
|
2192
2228
|
name = "rustls-pki-types"
|
|
2193
|
-
version = "1.
|
|
2229
|
+
version = "1.15.0"
|
|
2194
2230
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2195
|
-
checksum = "
|
|
2231
|
+
checksum = "764899a24af3980067ee14bc143654f297b22eaebfe3c7b6b211920a5a59b046"
|
|
2196
2232
|
dependencies = [
|
|
2197
2233
|
"web-time",
|
|
2198
2234
|
"zeroize",
|
|
@@ -2468,7 +2504,7 @@ checksum = "a18596f8c785a729f2819c0f6a7eae6ebeebdfffbfe4214ae6b087f690e31901"
|
|
|
2468
2504
|
dependencies = [
|
|
2469
2505
|
"new_debug_unreachable",
|
|
2470
2506
|
"parking_lot",
|
|
2471
|
-
"phf_shared",
|
|
2507
|
+
"phf_shared 0.13.1",
|
|
2472
2508
|
"precomputed-hash",
|
|
2473
2509
|
]
|
|
2474
2510
|
|
|
@@ -2478,8 +2514,8 @@ version = "0.6.1"
|
|
|
2478
2514
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2479
2515
|
checksum = "585635e46db231059f76c5849798146164652513eb9e8ab2685939dd90f29b69"
|
|
2480
2516
|
dependencies = [
|
|
2481
|
-
"phf_generator",
|
|
2482
|
-
"phf_shared",
|
|
2517
|
+
"phf_generator 0.13.1",
|
|
2518
|
+
"phf_shared 0.13.1",
|
|
2483
2519
|
"proc-macro2",
|
|
2484
2520
|
"quote",
|
|
2485
2521
|
]
|
|
@@ -2574,9 +2610,9 @@ dependencies = [
|
|
|
2574
2610
|
|
|
2575
2611
|
[[package]]
|
|
2576
2612
|
name = "time"
|
|
2577
|
-
version = "0.3.
|
|
2613
|
+
version = "0.3.53"
|
|
2578
2614
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2579
|
-
checksum = "
|
|
2615
|
+
checksum = "18dfaaeddcb932337b5e7866ee7d0ce9b76d2fd092997146f187ec09b4558a50"
|
|
2580
2616
|
dependencies = [
|
|
2581
2617
|
"deranged",
|
|
2582
2618
|
"num-conv",
|
|
@@ -2594,9 +2630,9 @@ checksum = "9e1c906769ad99c88eaa54e728060edef082f8e358ff32030cb7c7d315e81109"
|
|
|
2594
2630
|
|
|
2595
2631
|
[[package]]
|
|
2596
2632
|
name = "time-macros"
|
|
2597
|
-
version = "0.2.
|
|
2633
|
+
version = "0.2.31"
|
|
2598
2634
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
2599
|
-
checksum = "
|
|
2635
|
+
checksum = "c431b87111666e491a90baa837f914fb45cd5dc3c268591b0220ff5057f2085f"
|
|
2600
2636
|
dependencies = [
|
|
2601
2637
|
"num-conv",
|
|
2602
2638
|
"time-core",
|
|
@@ -3011,7 +3047,7 @@ version = "0.2.5"
|
|
|
3011
3047
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
3012
3048
|
checksum = "075474b12bcb3d2e3d4546580e9de478eeeead668a1761e2a8860c836b7ef297"
|
|
3013
3049
|
dependencies = [
|
|
3014
|
-
"phf",
|
|
3050
|
+
"phf 0.13.1",
|
|
3015
3051
|
"phf_codegen",
|
|
3016
3052
|
"string_cache",
|
|
3017
3053
|
"string_cache_codegen",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "crawlberg-rb"
|
|
3
|
-
version = "1.0.
|
|
3
|
+
version = "1.0.2"
|
|
4
4
|
edition = "2024"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
description = "High-performance web crawling engine"
|
|
@@ -17,7 +17,10 @@ path = "../src/lib.rs"
|
|
|
17
17
|
crate-type = ["cdylib"]
|
|
18
18
|
|
|
19
19
|
[dependencies]
|
|
20
|
-
crawlberg = { version = "1.0.
|
|
20
|
+
crawlberg = { version = "1.0.2", features = [
|
|
21
|
+
"interact",
|
|
22
|
+
"browser-chromiumoxide",
|
|
23
|
+
] }
|
|
21
24
|
futures = "0.3"
|
|
22
25
|
magnus = "0.8"
|
|
23
26
|
rb-sys = ">=0.9, <0.9.128"
|
data/ext/crawlberg_rb/src/lib.rs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
// This file is auto-generated by alef. DO NOT EDIT.
|
|
2
|
-
// alef:hash:
|
|
2
|
+
// alef:hash:23d662f17ccee663375ea978facec5b4b691adf30860c73224d58efb602c12d2
|
|
3
3
|
// Re-generate with: alef generate
|
|
4
4
|
#![allow(dead_code, unused_imports, unused_variables)]
|
|
5
5
|
#![allow(
|
|
@@ -438,21 +438,7 @@ unsafe impl TryConvertOwned for BrowserConfig {}
|
|
|
438
438
|
|
|
439
439
|
impl Default for BrowserConfig {
|
|
440
440
|
fn default() -> Self {
|
|
441
|
-
|
|
442
|
-
mode: BrowserMode::Auto,
|
|
443
|
-
backend: BrowserBackend::Chromiumoxide,
|
|
444
|
-
endpoint: None,
|
|
445
|
-
timeout: 30000,
|
|
446
|
-
wait: Default::default(),
|
|
447
|
-
wait_selector: None,
|
|
448
|
-
extra_wait: None,
|
|
449
|
-
proxy: None,
|
|
450
|
-
block_url_patterns: vec![],
|
|
451
|
-
eval_script: None,
|
|
452
|
-
robots_user_agent: None,
|
|
453
|
-
capture_network_events: false,
|
|
454
|
-
session_affinity: true,
|
|
455
|
-
}
|
|
441
|
+
crawlberg::BrowserConfig::default().into()
|
|
456
442
|
}
|
|
457
443
|
}
|
|
458
444
|
|
|
@@ -641,47 +627,7 @@ unsafe impl TryConvertOwned for CrawlConfig {}
|
|
|
641
627
|
|
|
642
628
|
impl Default for CrawlConfig {
|
|
643
629
|
fn default() -> Self {
|
|
644
|
-
|
|
645
|
-
max_depth: None,
|
|
646
|
-
max_pages: None,
|
|
647
|
-
max_concurrent: None,
|
|
648
|
-
respect_robots_txt: false,
|
|
649
|
-
soft_http_errors: false,
|
|
650
|
-
user_agent: None,
|
|
651
|
-
stay_on_domain: false,
|
|
652
|
-
allow_subdomains: false,
|
|
653
|
-
include_paths: vec![],
|
|
654
|
-
exclude_paths: vec![],
|
|
655
|
-
custom_headers: Default::default(),
|
|
656
|
-
request_timeout: 30000,
|
|
657
|
-
rate_limit_ms: None,
|
|
658
|
-
max_redirects: 10,
|
|
659
|
-
retry_count: 0,
|
|
660
|
-
retry_codes: vec![],
|
|
661
|
-
cookies_enabled: false,
|
|
662
|
-
auth: None,
|
|
663
|
-
max_body_size: None,
|
|
664
|
-
remove_tags: vec![],
|
|
665
|
-
content: Default::default(),
|
|
666
|
-
map_limit: None,
|
|
667
|
-
map_search: None,
|
|
668
|
-
download_assets: false,
|
|
669
|
-
asset_types: vec![],
|
|
670
|
-
max_asset_size: None,
|
|
671
|
-
browser: Default::default(),
|
|
672
|
-
proxy: None,
|
|
673
|
-
user_agents: vec![],
|
|
674
|
-
capture_screenshot: false,
|
|
675
|
-
follow_document_urls: false,
|
|
676
|
-
document_url_depth: None,
|
|
677
|
-
download_documents: true,
|
|
678
|
-
document_max_size: None,
|
|
679
|
-
document_mime_types: vec![],
|
|
680
|
-
warc_output: None,
|
|
681
|
-
browser_profile: None,
|
|
682
|
-
save_browser_profile: false,
|
|
683
|
-
ssrf: Default::default(),
|
|
684
|
-
}
|
|
630
|
+
crawlberg::CrawlConfig::default().into()
|
|
685
631
|
}
|
|
686
632
|
}
|
|
687
633
|
|
|
@@ -2410,13 +2356,7 @@ unsafe impl TryConvertOwned for LinkInfo {}
|
|
|
2410
2356
|
|
|
2411
2357
|
impl Default for LinkInfo {
|
|
2412
2358
|
fn default() -> Self {
|
|
2413
|
-
|
|
2414
|
-
url: String::new(),
|
|
2415
|
-
text: String::new(),
|
|
2416
|
-
link_type: Default::default(),
|
|
2417
|
-
rel: None,
|
|
2418
|
-
nofollow: false,
|
|
2419
|
-
}
|
|
2359
|
+
crawlberg::LinkInfo::default().into()
|
|
2420
2360
|
}
|
|
2421
2361
|
}
|
|
2422
2362
|
|
|
@@ -2511,13 +2451,7 @@ unsafe impl TryConvertOwned for ImageInfo {}
|
|
|
2511
2451
|
|
|
2512
2452
|
impl Default for ImageInfo {
|
|
2513
2453
|
fn default() -> Self {
|
|
2514
|
-
|
|
2515
|
-
url: String::new(),
|
|
2516
|
-
alt: None,
|
|
2517
|
-
width: None,
|
|
2518
|
-
height: None,
|
|
2519
|
-
source: Default::default(),
|
|
2520
|
-
}
|
|
2454
|
+
crawlberg::ImageInfo::default().into()
|
|
2521
2455
|
}
|
|
2522
2456
|
}
|
|
2523
2457
|
|
|
@@ -2608,11 +2542,7 @@ unsafe impl TryConvertOwned for FeedInfo {}
|
|
|
2608
2542
|
|
|
2609
2543
|
impl Default for FeedInfo {
|
|
2610
2544
|
fn default() -> Self {
|
|
2611
|
-
|
|
2612
|
-
url: String::new(),
|
|
2613
|
-
title: None,
|
|
2614
|
-
feed_type: Default::default(),
|
|
2615
|
-
}
|
|
2545
|
+
crawlberg::FeedInfo::default().into()
|
|
2616
2546
|
}
|
|
2617
2547
|
}
|
|
2618
2548
|
|
|
@@ -2854,14 +2784,7 @@ unsafe impl TryConvertOwned for DownloadedAsset {}
|
|
|
2854
2784
|
|
|
2855
2785
|
impl Default for DownloadedAsset {
|
|
2856
2786
|
fn default() -> Self {
|
|
2857
|
-
|
|
2858
|
-
url: String::new(),
|
|
2859
|
-
content_hash: String::new(),
|
|
2860
|
-
mime_type: None,
|
|
2861
|
-
size: 0,
|
|
2862
|
-
asset_category: Default::default(),
|
|
2863
|
-
html_tag: None,
|
|
2864
|
-
}
|
|
2787
|
+
crawlberg::DownloadedAsset::default().into()
|
|
2865
2788
|
}
|
|
2866
2789
|
}
|
|
2867
2790
|
|
data/lib/crawlberg/native.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:23d662f17ccee663375ea978facec5b4b691adf30860c73224d58efb602c12d2
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# frozen_string_literal: true
|
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
require "json"
|
|
8
8
|
require "sorbet-runtime"
|
|
9
9
|
require "crawlberg_rb"
|
|
10
|
+
|
|
10
11
|
module Crawlberg
|
|
11
12
|
# Authentication configuration.
|
|
12
13
|
module AuthConfig
|
|
@@ -22,10 +23,14 @@ module Crawlberg
|
|
|
22
23
|
def self.from_hash(hash)
|
|
23
24
|
discriminator = hash[:type] || hash["type"]
|
|
24
25
|
case discriminator
|
|
25
|
-
when "basic"
|
|
26
|
-
|
|
27
|
-
when "
|
|
28
|
-
|
|
26
|
+
when "basic"
|
|
27
|
+
AuthConfigBasic.from_hash(hash)
|
|
28
|
+
when "bearer"
|
|
29
|
+
AuthConfigBearer.from_hash(hash)
|
|
30
|
+
when "header"
|
|
31
|
+
AuthConfigHeader.from_hash(hash)
|
|
32
|
+
else
|
|
33
|
+
raise "Unknown discriminator: #{discriminator}"
|
|
29
34
|
end
|
|
30
35
|
end
|
|
31
36
|
end
|
|
@@ -36,10 +41,12 @@ module Crawlberg
|
|
|
36
41
|
|
|
37
42
|
# Username sent in the `Authorization: Basic` header.
|
|
38
43
|
sig { returns(String) }
|
|
39
|
-
|
|
44
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
45
|
+
def username = super
|
|
40
46
|
# Password sent in the `Authorization: Basic` header.
|
|
41
47
|
sig { returns(String) }
|
|
42
|
-
|
|
48
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
49
|
+
def password = super
|
|
43
50
|
sig { returns(T::Boolean) }
|
|
44
51
|
def basic? = true
|
|
45
52
|
sig { returns(T::Boolean) }
|
|
@@ -60,7 +67,8 @@ module Crawlberg
|
|
|
60
67
|
|
|
61
68
|
# Token sent in the `Authorization: Bearer` header.
|
|
62
69
|
sig { returns(String) }
|
|
63
|
-
|
|
70
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
71
|
+
def token = super
|
|
64
72
|
sig { returns(T::Boolean) }
|
|
65
73
|
def basic? = false
|
|
66
74
|
sig { returns(T::Boolean) }
|
|
@@ -81,10 +89,12 @@ module Crawlberg
|
|
|
81
89
|
|
|
82
90
|
# HTTP header name to set on each request.
|
|
83
91
|
sig { returns(String) }
|
|
84
|
-
|
|
92
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
93
|
+
def name = super
|
|
85
94
|
# HTTP header value to send.
|
|
86
95
|
sig { returns(String) }
|
|
87
|
-
|
|
96
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
97
|
+
def value = super
|
|
88
98
|
sig { returns(T::Boolean) }
|
|
89
99
|
def basic? = false
|
|
90
100
|
sig { returns(T::Boolean) }
|
|
@@ -120,10 +130,14 @@ module Crawlberg
|
|
|
120
130
|
def self.from_hash(hash)
|
|
121
131
|
discriminator = hash[:type] || hash["type"]
|
|
122
132
|
case discriminator
|
|
123
|
-
when "page"
|
|
124
|
-
|
|
125
|
-
when "
|
|
126
|
-
|
|
133
|
+
when "page"
|
|
134
|
+
CrawlEventPage.from_hash(hash)
|
|
135
|
+
when "error"
|
|
136
|
+
CrawlEventError.from_hash(hash)
|
|
137
|
+
when "complete"
|
|
138
|
+
CrawlEventComplete.from_hash(hash)
|
|
139
|
+
else
|
|
140
|
+
raise "Unknown discriminator: #{discriminator}"
|
|
127
141
|
end
|
|
128
142
|
end
|
|
129
143
|
end
|
|
@@ -134,7 +148,8 @@ module Crawlberg
|
|
|
134
148
|
|
|
135
149
|
# The crawled page result.
|
|
136
150
|
sig { returns(CrawlPageResult) }
|
|
137
|
-
|
|
151
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
152
|
+
def result = super
|
|
138
153
|
sig { returns(T::Boolean) }
|
|
139
154
|
def page? = true
|
|
140
155
|
sig { returns(T::Boolean) }
|
|
@@ -155,10 +170,12 @@ module Crawlberg
|
|
|
155
170
|
|
|
156
171
|
# The URL that failed.
|
|
157
172
|
sig { returns(String) }
|
|
158
|
-
|
|
173
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
174
|
+
def url = super
|
|
159
175
|
# The error message.
|
|
160
176
|
sig { returns(String) }
|
|
161
|
-
|
|
177
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
178
|
+
def error = super
|
|
162
179
|
sig { returns(T::Boolean) }
|
|
163
180
|
def page? = false
|
|
164
181
|
sig { returns(T::Boolean) }
|
|
@@ -179,7 +196,8 @@ module Crawlberg
|
|
|
179
196
|
|
|
180
197
|
# Total number of pages crawled.
|
|
181
198
|
sig { returns(Integer) }
|
|
182
|
-
|
|
199
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
200
|
+
def pages_crawled = super
|
|
183
201
|
sig { returns(T::Boolean) }
|
|
184
202
|
def page? = false
|
|
185
203
|
sig { returns(T::Boolean) }
|
|
@@ -213,15 +231,24 @@ module Crawlberg
|
|
|
213
231
|
def self.from_hash(hash)
|
|
214
232
|
discriminator = hash[:type] || hash["type"]
|
|
215
233
|
case discriminator
|
|
216
|
-
when "click"
|
|
217
|
-
|
|
218
|
-
when "
|
|
219
|
-
|
|
220
|
-
when "
|
|
221
|
-
|
|
222
|
-
when "
|
|
223
|
-
|
|
224
|
-
|
|
234
|
+
when "click"
|
|
235
|
+
PageActionClick.from_hash(hash)
|
|
236
|
+
when "type"
|
|
237
|
+
PageActionTypeText.from_hash(hash)
|
|
238
|
+
when "press"
|
|
239
|
+
PageActionPress.from_hash(hash)
|
|
240
|
+
when "scroll"
|
|
241
|
+
PageActionScroll.from_hash(hash)
|
|
242
|
+
when "wait"
|
|
243
|
+
PageActionWait.from_hash(hash)
|
|
244
|
+
when "screenshot"
|
|
245
|
+
PageActionScreenshot.from_hash(hash)
|
|
246
|
+
when "executeJs"
|
|
247
|
+
PageActionExecuteJs.from_hash(hash)
|
|
248
|
+
when "scrape"
|
|
249
|
+
PageActionScrape.from_hash(hash)
|
|
250
|
+
else
|
|
251
|
+
raise "Unknown discriminator: #{discriminator}"
|
|
225
252
|
end
|
|
226
253
|
end
|
|
227
254
|
end
|
|
@@ -232,7 +259,8 @@ module Crawlberg
|
|
|
232
259
|
|
|
233
260
|
# CSS selector for the element to click.
|
|
234
261
|
sig { returns(String) }
|
|
235
|
-
|
|
262
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
263
|
+
def selector = super
|
|
236
264
|
sig { returns(T::Boolean) }
|
|
237
265
|
def click? = true
|
|
238
266
|
sig { returns(T::Boolean) }
|
|
@@ -263,10 +291,12 @@ module Crawlberg
|
|
|
263
291
|
|
|
264
292
|
# CSS selector for the input element.
|
|
265
293
|
sig { returns(String) }
|
|
266
|
-
|
|
294
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
295
|
+
def selector = super
|
|
267
296
|
# Text to type into the element.
|
|
268
297
|
sig { returns(String) }
|
|
269
|
-
|
|
298
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
299
|
+
def text = super
|
|
270
300
|
sig { returns(T::Boolean) }
|
|
271
301
|
def click? = false
|
|
272
302
|
sig { returns(T::Boolean) }
|
|
@@ -297,7 +327,8 @@ module Crawlberg
|
|
|
297
327
|
|
|
298
328
|
# Key name to press.
|
|
299
329
|
sig { returns(String) }
|
|
300
|
-
|
|
330
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
331
|
+
def key = super
|
|
301
332
|
sig { returns(T::Boolean) }
|
|
302
333
|
def click? = false
|
|
303
334
|
sig { returns(T::Boolean) }
|
|
@@ -328,13 +359,16 @@ module Crawlberg
|
|
|
328
359
|
|
|
329
360
|
# Direction to scroll.
|
|
330
361
|
sig { returns(ScrollDirection) }
|
|
331
|
-
|
|
362
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
363
|
+
def direction = super
|
|
332
364
|
# Optional CSS selector for a scrollable element. Scrolls the page if absent.
|
|
333
365
|
sig { returns(T.nilable(String)) }
|
|
334
|
-
|
|
366
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
367
|
+
def selector = super
|
|
335
368
|
# Optional pixel amount to scroll. Uses a default if absent.
|
|
336
369
|
sig { returns(T.nilable(Integer)) }
|
|
337
|
-
|
|
370
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
371
|
+
def amount = super
|
|
338
372
|
sig { returns(T::Boolean) }
|
|
339
373
|
def click? = false
|
|
340
374
|
sig { returns(T::Boolean) }
|
|
@@ -355,7 +389,11 @@ module Crawlberg
|
|
|
355
389
|
# @return [self]
|
|
356
390
|
sig { params(hash: T::Hash[T.untyped, T.untyped]).returns(T.attached_class) }
|
|
357
391
|
def self.from_hash(hash)
|
|
358
|
-
new(
|
|
392
|
+
new(
|
|
393
|
+
direction: hash[:direction] || hash["direction"],
|
|
394
|
+
selector: hash[:selector] || hash["selector"],
|
|
395
|
+
amount: hash[:amount] || hash["amount"]
|
|
396
|
+
)
|
|
359
397
|
end
|
|
360
398
|
end
|
|
361
399
|
## Wait for a duration or for an element to appear.
|
|
@@ -365,10 +403,12 @@ module Crawlberg
|
|
|
365
403
|
|
|
366
404
|
# Milliseconds to wait. Ignored if `selector` is provided.
|
|
367
405
|
sig { returns(T.nilable(Integer)) }
|
|
368
|
-
|
|
406
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
407
|
+
def milliseconds = super
|
|
369
408
|
# CSS selector to wait for.
|
|
370
409
|
sig { returns(T.nilable(String)) }
|
|
371
|
-
|
|
410
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
411
|
+
def selector = super
|
|
372
412
|
sig { returns(T::Boolean) }
|
|
373
413
|
def click? = false
|
|
374
414
|
sig { returns(T::Boolean) }
|
|
@@ -403,7 +443,8 @@ module Crawlberg
|
|
|
403
443
|
# `full_page` (snake_case) alias so language bindings and fixtures can
|
|
404
444
|
# use either convention without error.
|
|
405
445
|
sig { returns(T.nilable(T::Boolean)) }
|
|
406
|
-
|
|
446
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
447
|
+
def full_page = super
|
|
407
448
|
sig { returns(T::Boolean) }
|
|
408
449
|
def click? = false
|
|
409
450
|
sig { returns(T::Boolean) }
|
|
@@ -439,7 +480,8 @@ module Crawlberg
|
|
|
439
480
|
|
|
440
481
|
# JavaScript source code to execute. Max 1 MB.
|
|
441
482
|
sig { returns(String) }
|
|
442
|
-
|
|
483
|
+
# rubocop:disable Lint/UselessMethodDefinition
|
|
484
|
+
def script = super
|
|
443
485
|
sig { returns(T::Boolean) }
|
|
444
486
|
def click? = false
|
|
445
487
|
sig { returns(T::Boolean) }
|
data/lib/crawlberg/version.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# This file is auto-generated by alef — DO NOT EDIT.
|
|
2
|
-
# alef:hash:
|
|
2
|
+
# alef:hash:23d662f17ccee663375ea978facec5b4b691adf30860c73224d58efb602c12d2
|
|
3
3
|
# To regenerate: alef generate
|
|
4
4
|
# To verify freshness: alef verify --exit-code
|
|
5
5
|
# frozen_string_literal: true
|
|
6
6
|
|
|
7
7
|
module Crawlberg
|
|
8
8
|
## The version string for this package.
|
|
9
|
-
VERSION = "1.0.
|
|
9
|
+
VERSION = "1.0.2"
|
|
10
10
|
end
|