@mmmbuto/masix 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/install.js +12 -49
- package/package.json +2 -2
- package/packages/plugin-base/discovery/0.2.4/manifest.json +7 -0
- package/packages/plugin-base/discovery/0.3.0/SHA256SUMS +2 -0
- package/packages/plugin-base/discovery/0.3.0/discovery-android-aarch64-termux.pkg +0 -0
- package/packages/plugin-base/discovery/0.3.0/discovery-linux-x86_64.pkg +0 -0
- package/packages/plugin-base/discovery/0.3.0/manifest.json +34 -0
- package/packages/plugin-base/discovery/CHANGELOG.md +7 -0
- package/packages/plugin-base/discovery/README.md +19 -10
- package/packages/plugin-base/discovery/source/Cargo.toml +3 -2
- package/packages/plugin-base/discovery/source/plugin.manifest.json +9 -4
- package/packages/plugin-base/discovery/source/src/doh.rs +103 -0
- package/packages/plugin-base/discovery/source/src/magnet_cache.rs +113 -0
- package/packages/plugin-base/discovery/source/src/main.rs +769 -49
- package/packages/plugin-base/discovery/source/src/torrent.rs +701 -0
- package/packages/plugin-base/discovery/source/src/transport.rs +112 -0
- package/prebuilt/masix +0 -0
|
@@ -1,3 +1,14 @@
|
|
|
1
|
+
mod doh;
|
|
2
|
+
mod magnet_cache;
|
|
3
|
+
mod torrent;
|
|
4
|
+
mod transport;
|
|
5
|
+
|
|
6
|
+
use crate::doh::DohResolver;
|
|
7
|
+
use crate::magnet_cache::MagnetCache;
|
|
8
|
+
use crate::torrent::{
|
|
9
|
+
bundled_providers, provider_statuses, search_mirror_catalog, ProviderStatus, TorrentProvider,
|
|
10
|
+
};
|
|
11
|
+
use crate::transport::TransportLayer;
|
|
1
12
|
use anyhow::{anyhow, Result};
|
|
2
13
|
use clap::{Parser, Subcommand};
|
|
3
14
|
use scraper::{Html, Selector};
|
|
@@ -5,6 +16,7 @@ use serde::{Deserialize, Serialize};
|
|
|
5
16
|
use std::cmp::Ordering;
|
|
6
17
|
use std::collections::{HashMap, HashSet};
|
|
7
18
|
use std::io::{self, BufRead, Write};
|
|
19
|
+
use std::path::PathBuf;
|
|
8
20
|
use std::sync::OnceLock;
|
|
9
21
|
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
|
10
22
|
use tokio::sync::Mutex;
|
|
@@ -13,13 +25,17 @@ const DEFAULT_SEARXNG_URLS: &[&str] = &[
|
|
|
13
25
|
"https://search.inetol.net",
|
|
14
26
|
"https://searx.work",
|
|
15
27
|
"https://search.privacyredirect.com",
|
|
28
|
+
"https://searx.be",
|
|
29
|
+
"https://northboot.xyz",
|
|
30
|
+
"https://searxng.site",
|
|
31
|
+
"https://etsi.me",
|
|
16
32
|
];
|
|
17
33
|
const MAX_WEB_CONTENT: usize = 15_000;
|
|
18
34
|
const SEARX_RETRIES: usize = 3;
|
|
19
35
|
const MAX_SEARX_PARALLEL_ENDPOINTS: usize = 3;
|
|
20
36
|
const MAX_ENDPOINTS_FROM_CONFIG: usize = 8;
|
|
21
37
|
const DEFAULT_SEARCH_TIMEOUT_SECS: u64 = 15;
|
|
22
|
-
const
|
|
38
|
+
const DEFAULT_TOR_TIMEOUT_SECS: u64 = 45;
|
|
23
39
|
const MAX_PROVIDER_RESULTS: usize = 40;
|
|
24
40
|
const MODULE_VERSION: &str = env!("CARGO_PKG_VERSION");
|
|
25
41
|
const BROWSER_USER_AGENT: &str =
|
|
@@ -50,6 +66,16 @@ enum Commands {
|
|
|
50
66
|
max_results: usize,
|
|
51
67
|
#[arg(long)]
|
|
52
68
|
endpoint: Option<String>,
|
|
69
|
+
#[arg(long, default_value_t = false)]
|
|
70
|
+
use_tor: bool,
|
|
71
|
+
#[arg(short, long)]
|
|
72
|
+
json: bool,
|
|
73
|
+
},
|
|
74
|
+
/// Search the web with anti-censorship transport preferences
|
|
75
|
+
TorSearch {
|
|
76
|
+
query: String,
|
|
77
|
+
#[arg(short, long, default_value_t = 5)]
|
|
78
|
+
max_results: usize,
|
|
53
79
|
#[arg(short, long)]
|
|
54
80
|
json: bool,
|
|
55
81
|
},
|
|
@@ -64,6 +90,10 @@ enum Commands {
|
|
|
64
90
|
endpoint: Option<String>,
|
|
65
91
|
#[arg(long, default_value_t = true)]
|
|
66
92
|
with_magnets: bool,
|
|
93
|
+
#[arg(long, value_delimiter = ',')]
|
|
94
|
+
providers: Vec<String>,
|
|
95
|
+
#[arg(long, default_value_t = false)]
|
|
96
|
+
use_tor: bool,
|
|
67
97
|
#[arg(short, long)]
|
|
68
98
|
json: bool,
|
|
69
99
|
},
|
|
@@ -75,6 +105,11 @@ enum Commands {
|
|
|
75
105
|
#[arg(short, long)]
|
|
76
106
|
json: bool,
|
|
77
107
|
},
|
|
108
|
+
/// Report current search transport and provider status
|
|
109
|
+
SearchStatus {
|
|
110
|
+
#[arg(short, long)]
|
|
111
|
+
json: bool,
|
|
112
|
+
},
|
|
78
113
|
/// Print plugin metadata (draft)
|
|
79
114
|
Manifest,
|
|
80
115
|
/// Run MCP server over stdio (JSON-RPC)
|
|
@@ -113,9 +148,11 @@ struct SearchResult {
|
|
|
113
148
|
score: Option<f64>,
|
|
114
149
|
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
115
150
|
endpoint: Option<String>,
|
|
151
|
+
#[serde(default, skip_serializing_if = "String::is_empty")]
|
|
152
|
+
via: String,
|
|
116
153
|
}
|
|
117
154
|
|
|
118
|
-
#[derive(Debug, Serialize)]
|
|
155
|
+
#[derive(Debug, Serialize, Clone)]
|
|
119
156
|
struct TorrentSearchResult {
|
|
120
157
|
title: String,
|
|
121
158
|
url: String,
|
|
@@ -123,6 +160,26 @@ struct TorrentSearchResult {
|
|
|
123
160
|
engine: String,
|
|
124
161
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
|
125
162
|
magnet_links: Vec<String>,
|
|
163
|
+
#[serde(default, skip_serializing_if = "String::is_empty")]
|
|
164
|
+
provider: String,
|
|
165
|
+
#[serde(default, skip_serializing_if = "String::is_empty")]
|
|
166
|
+
via: String,
|
|
167
|
+
#[serde(default, skip_serializing_if = "String::is_empty")]
|
|
168
|
+
source_url: String,
|
|
169
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
170
|
+
info_hash: Option<String>,
|
|
171
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
172
|
+
size: Option<String>,
|
|
173
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
174
|
+
size_bytes: Option<u64>,
|
|
175
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
176
|
+
seeds: Option<u32>,
|
|
177
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
178
|
+
leeches: Option<u32>,
|
|
179
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
180
|
+
uploader: Option<String>,
|
|
181
|
+
#[serde(default, skip_serializing_if = "Option::is_none")]
|
|
182
|
+
category: Option<String>,
|
|
126
183
|
}
|
|
127
184
|
|
|
128
185
|
#[derive(Debug, Deserialize)]
|
|
@@ -235,23 +292,93 @@ struct ArchiveDoc {
|
|
|
235
292
|
description: Option<serde_json::Value>,
|
|
236
293
|
}
|
|
237
294
|
|
|
295
|
+
#[derive(Debug, Serialize)]
|
|
296
|
+
struct SearchStatusSnapshot {
|
|
297
|
+
plugin_version: String,
|
|
298
|
+
tor: TorStatus,
|
|
299
|
+
doh: DohStatus,
|
|
300
|
+
magnet_cache: MagnetCacheStatus,
|
|
301
|
+
searxng_endpoints: Vec<SearxEndpointStatus>,
|
|
302
|
+
torrent_providers: Vec<ProviderStatus>,
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
#[derive(Debug, Serialize)]
|
|
306
|
+
struct TorStatus {
|
|
307
|
+
available: bool,
|
|
308
|
+
port: Option<u16>,
|
|
309
|
+
mode: String,
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
#[derive(Debug, Serialize)]
|
|
313
|
+
struct DohStatus {
|
|
314
|
+
primary: String,
|
|
315
|
+
fallback: String,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
#[derive(Debug, Serialize)]
|
|
319
|
+
struct MagnetCacheStatus {
|
|
320
|
+
entries: usize,
|
|
321
|
+
ttl_hours: u64,
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
#[derive(Debug, Serialize)]
|
|
325
|
+
struct SearxEndpointStatus {
|
|
326
|
+
url: String,
|
|
327
|
+
successes: u32,
|
|
328
|
+
failures: u32,
|
|
329
|
+
cooldown_until: u64,
|
|
330
|
+
last_error: Option<String>,
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
#[derive(Debug)]
|
|
334
|
+
struct DiscoveryState {
|
|
335
|
+
transport: TransportLayer,
|
|
336
|
+
doh: DohResolver,
|
|
337
|
+
cache: MagnetCache,
|
|
338
|
+
torrent_providers: Vec<TorrentProvider>,
|
|
339
|
+
}
|
|
340
|
+
|
|
238
341
|
static ENDPOINT_HEALTH: OnceLock<Mutex<HashMap<String, EndpointHealth>>> = OnceLock::new();
|
|
239
342
|
|
|
240
343
|
fn endpoint_health_store() -> &'static Mutex<HashMap<String, EndpointHealth>> {
|
|
241
344
|
ENDPOINT_HEALTH.get_or_init(|| Mutex::new(HashMap::new()))
|
|
242
345
|
}
|
|
243
346
|
|
|
347
|
+
impl DiscoveryState {
|
|
348
|
+
async fn new() -> Result<Self> {
|
|
349
|
+
let user_agent = format!("MasiXDiscovery/{MODULE_VERSION} transport");
|
|
350
|
+
let transport = TransportLayer::new(
|
|
351
|
+
&user_agent,
|
|
352
|
+
DEFAULT_SEARCH_TIMEOUT_SECS,
|
|
353
|
+
DEFAULT_TOR_TIMEOUT_SECS,
|
|
354
|
+
)
|
|
355
|
+
.await?;
|
|
356
|
+
let doh = DohResolver::new(&user_agent)?;
|
|
357
|
+
let cache = MagnetCache::new(default_data_dir().as_deref()).await?;
|
|
358
|
+
Ok(Self {
|
|
359
|
+
transport,
|
|
360
|
+
doh,
|
|
361
|
+
cache,
|
|
362
|
+
torrent_providers: bundled_providers(),
|
|
363
|
+
})
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
|
|
244
367
|
#[tokio::main]
|
|
245
368
|
async fn main() -> Result<()> {
|
|
246
369
|
let cli = Cli::parse();
|
|
370
|
+
let state = DiscoveryState::new().await?;
|
|
247
371
|
match cli.command {
|
|
248
372
|
Commands::WebSearch {
|
|
249
373
|
query,
|
|
250
374
|
max_results,
|
|
251
375
|
endpoint,
|
|
376
|
+
use_tor,
|
|
252
377
|
json,
|
|
253
378
|
} => {
|
|
254
|
-
let results =
|
|
379
|
+
let results =
|
|
380
|
+
broker_web_search(&state, endpoint.as_deref(), &query, max_results, use_tor, None)
|
|
381
|
+
.await?;
|
|
255
382
|
if json {
|
|
256
383
|
println!("{}", serde_json::to_string_pretty(&results)?);
|
|
257
384
|
} else if results.is_empty() {
|
|
@@ -282,21 +409,67 @@ async fn main() -> Result<()> {
|
|
|
282
409
|
}
|
|
283
410
|
}
|
|
284
411
|
}
|
|
412
|
+
Commands::TorSearch {
|
|
413
|
+
query,
|
|
414
|
+
max_results,
|
|
415
|
+
json,
|
|
416
|
+
} => {
|
|
417
|
+
let results = broker_web_search(&state, None, &query, max_results, true, None).await?;
|
|
418
|
+
if json {
|
|
419
|
+
println!("{}", serde_json::to_string_pretty(&results)?);
|
|
420
|
+
} else if results.is_empty() {
|
|
421
|
+
println!("No results found.");
|
|
422
|
+
} else {
|
|
423
|
+
for (i, item) in results.iter().enumerate() {
|
|
424
|
+
println!(
|
|
425
|
+
"{}. {} [{} | {} | {}]\n {}\n {}\n",
|
|
426
|
+
i + 1,
|
|
427
|
+
if item.title.trim().is_empty() {
|
|
428
|
+
"(untitled)"
|
|
429
|
+
} else {
|
|
430
|
+
item.title.trim()
|
|
431
|
+
},
|
|
432
|
+
if item.engine.trim().is_empty() {
|
|
433
|
+
"unknown"
|
|
434
|
+
} else {
|
|
435
|
+
item.engine.trim()
|
|
436
|
+
},
|
|
437
|
+
if item.provider.trim().is_empty() {
|
|
438
|
+
"broker"
|
|
439
|
+
} else {
|
|
440
|
+
item.provider.trim()
|
|
441
|
+
},
|
|
442
|
+
if item.via.trim().is_empty() {
|
|
443
|
+
"clearnet"
|
|
444
|
+
} else {
|
|
445
|
+
item.via.trim()
|
|
446
|
+
},
|
|
447
|
+
item.url.trim(),
|
|
448
|
+
item.content.trim()
|
|
449
|
+
);
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
}
|
|
285
453
|
Commands::WebFetch { url } => {
|
|
286
|
-
println!("{}", web_fetch_page(&url).await?);
|
|
454
|
+
println!("{}", web_fetch_page(&url, &state.transport).await?);
|
|
287
455
|
}
|
|
288
456
|
Commands::TorrentSearch {
|
|
289
457
|
query,
|
|
290
458
|
max_results,
|
|
291
459
|
endpoint,
|
|
292
460
|
with_magnets,
|
|
461
|
+
providers,
|
|
462
|
+
use_tor,
|
|
293
463
|
json,
|
|
294
464
|
} => {
|
|
295
465
|
let results = torrent_search(
|
|
466
|
+
&state,
|
|
296
467
|
endpoint.as_deref(),
|
|
297
468
|
&query,
|
|
298
469
|
max_results.min(20).max(1),
|
|
299
470
|
with_magnets,
|
|
471
|
+
&providers,
|
|
472
|
+
use_tor,
|
|
300
473
|
)
|
|
301
474
|
.await?;
|
|
302
475
|
if json {
|
|
@@ -346,17 +519,32 @@ async fn main() -> Result<()> {
|
|
|
346
519
|
}
|
|
347
520
|
}
|
|
348
521
|
}
|
|
522
|
+
Commands::SearchStatus { json } => {
|
|
523
|
+
let status = search_status(&state).await;
|
|
524
|
+
if json {
|
|
525
|
+
println!("{}", serde_json::to_string_pretty(&status)?);
|
|
526
|
+
} else {
|
|
527
|
+
println!(
|
|
528
|
+
"Discovery {} | tor={} port={:?} | cache_entries={} | providers={}",
|
|
529
|
+
status.plugin_version,
|
|
530
|
+
status.tor.available,
|
|
531
|
+
status.tor.port,
|
|
532
|
+
status.magnet_cache.entries,
|
|
533
|
+
status.torrent_providers.len()
|
|
534
|
+
);
|
|
535
|
+
}
|
|
536
|
+
}
|
|
349
537
|
Commands::Manifest => {
|
|
350
538
|
println!("{}", include_str!("../plugin.manifest.json"));
|
|
351
539
|
}
|
|
352
540
|
Commands::ServeMcp => {
|
|
353
|
-
run_mcp_server().await?;
|
|
541
|
+
run_mcp_server(&state).await?;
|
|
354
542
|
}
|
|
355
543
|
}
|
|
356
544
|
Ok(())
|
|
357
545
|
}
|
|
358
546
|
|
|
359
|
-
async fn run_mcp_server() -> Result<()> {
|
|
547
|
+
async fn run_mcp_server(state: &DiscoveryState) -> Result<()> {
|
|
360
548
|
let stdin = io::stdin();
|
|
361
549
|
let mut stdout = io::stdout();
|
|
362
550
|
|
|
@@ -385,7 +573,7 @@ async fn run_mcp_server() -> Result<()> {
|
|
|
385
573
|
}
|
|
386
574
|
};
|
|
387
575
|
|
|
388
|
-
let response = handle_mcp_request(&request).await;
|
|
576
|
+
let response = handle_mcp_request(&request, state).await;
|
|
389
577
|
writeln!(stdout, "{}", serde_json::to_string(&response)?)?;
|
|
390
578
|
stdout.flush()?;
|
|
391
579
|
}
|
|
@@ -393,7 +581,7 @@ async fn run_mcp_server() -> Result<()> {
|
|
|
393
581
|
Ok(())
|
|
394
582
|
}
|
|
395
583
|
|
|
396
|
-
async fn handle_mcp_request(request: &JsonRpcRequest) -> JsonRpcResponse {
|
|
584
|
+
async fn handle_mcp_request(request: &JsonRpcRequest, state: &DiscoveryState) -> JsonRpcResponse {
|
|
397
585
|
match request.method.as_str() {
|
|
398
586
|
"initialize" => JsonRpcResponse {
|
|
399
587
|
jsonrpc: "2.0".to_string(),
|
|
@@ -434,7 +622,7 @@ async fn handle_mcp_request(request: &JsonRpcRequest) -> JsonRpcResponse {
|
|
|
434
622
|
.cloned()
|
|
435
623
|
.unwrap_or(serde_json::json!({}));
|
|
436
624
|
|
|
437
|
-
match handle_tool_call(tool_name, arguments).await {
|
|
625
|
+
match handle_tool_call(state, tool_name, arguments).await {
|
|
438
626
|
Ok(result) => JsonRpcResponse {
|
|
439
627
|
jsonrpc: "2.0".to_string(),
|
|
440
628
|
id: request.id.clone(),
|
|
@@ -488,6 +676,37 @@ fn get_tool_definitions() -> Vec<ToolDefinition> {
|
|
|
488
676
|
"type": "integer",
|
|
489
677
|
"description": "Maximum number of results (default: 5, max: 20)",
|
|
490
678
|
"default": 5
|
|
679
|
+
},
|
|
680
|
+
"use_tor": {
|
|
681
|
+
"type": "boolean",
|
|
682
|
+
"description": "Prefer Tor-capable routes when available; degrades to clearnet when Tor is unavailable",
|
|
683
|
+
"default": false
|
|
684
|
+
}
|
|
685
|
+
},
|
|
686
|
+
"required": ["query"]
|
|
687
|
+
}),
|
|
688
|
+
},
|
|
689
|
+
ToolDefinition {
|
|
690
|
+
name: "tor_search".to_string(),
|
|
691
|
+
description:
|
|
692
|
+
"Search using anti-censorship transport preferences. Falls back to clearnet if Tor is unavailable."
|
|
693
|
+
.to_string(),
|
|
694
|
+
input_schema: serde_json::json!({
|
|
695
|
+
"type": "object",
|
|
696
|
+
"properties": {
|
|
697
|
+
"query": {
|
|
698
|
+
"type": "string",
|
|
699
|
+
"description": "Search query"
|
|
700
|
+
},
|
|
701
|
+
"max_results": {
|
|
702
|
+
"type": "integer",
|
|
703
|
+
"description": "Maximum number of results (default: 5, max: 20)",
|
|
704
|
+
"default": 5
|
|
705
|
+
},
|
|
706
|
+
"engines": {
|
|
707
|
+
"type": "array",
|
|
708
|
+
"items": { "type": "string" },
|
|
709
|
+
"description": "Optional engine filter: ['ahmia', 'searxng', 'brave', 'duckduckgo', 'bing', 'wikipedia', 'news']. Omit to use all engines."
|
|
491
710
|
}
|
|
492
711
|
},
|
|
493
712
|
"required": ["query"]
|
|
@@ -528,6 +747,16 @@ fn get_tool_definitions() -> Vec<ToolDefinition> {
|
|
|
528
747
|
"type": "boolean",
|
|
529
748
|
"description": "Try to extract magnet links from each result URL (default: true)",
|
|
530
749
|
"default": true
|
|
750
|
+
},
|
|
751
|
+
"providers": {
|
|
752
|
+
"type": "array",
|
|
753
|
+
"items": { "type": "string" },
|
|
754
|
+
"description": "Optional provider filter: 1337x, thepiratebay, yts, torrentgalaxy, eztv, kickass, limetorrents, solidtorrents, bt4g, torrentz2, nyaa, searx, archive"
|
|
755
|
+
},
|
|
756
|
+
"use_tor": {
|
|
757
|
+
"type": "boolean",
|
|
758
|
+
"description": "Prefer Tor-capable routes when available; degrades to clearnet when Tor is unavailable",
|
|
759
|
+
"default": false
|
|
531
760
|
}
|
|
532
761
|
},
|
|
533
762
|
"required": ["query"]
|
|
@@ -552,10 +781,22 @@ fn get_tool_definitions() -> Vec<ToolDefinition> {
|
|
|
552
781
|
"required": ["url"]
|
|
553
782
|
}),
|
|
554
783
|
},
|
|
784
|
+
ToolDefinition {
|
|
785
|
+
name: "search_status".to_string(),
|
|
786
|
+
description: "Return Discovery transport, SearX health, and torrent provider capability status".to_string(),
|
|
787
|
+
input_schema: serde_json::json!({
|
|
788
|
+
"type": "object",
|
|
789
|
+
"properties": {}
|
|
790
|
+
}),
|
|
791
|
+
},
|
|
555
792
|
]
|
|
556
793
|
}
|
|
557
794
|
|
|
558
|
-
async fn handle_tool_call(
|
|
795
|
+
async fn handle_tool_call(
|
|
796
|
+
state: &DiscoveryState,
|
|
797
|
+
name: &str,
|
|
798
|
+
arguments: serde_json::Value,
|
|
799
|
+
) -> Result<ToolResult> {
|
|
559
800
|
match name {
|
|
560
801
|
"web_search" => {
|
|
561
802
|
let query = arguments
|
|
@@ -566,8 +807,46 @@ async fn handle_tool_call(name: &str, arguments: serde_json::Value) -> Result<To
|
|
|
566
807
|
.get("max_results")
|
|
567
808
|
.and_then(|v| v.as_u64())
|
|
568
809
|
.unwrap_or(5) as usize;
|
|
810
|
+
let use_tor = arguments
|
|
811
|
+
.get("use_tor")
|
|
812
|
+
.and_then(|v| v.as_bool())
|
|
813
|
+
.unwrap_or(false);
|
|
814
|
+
|
|
815
|
+
let results =
|
|
816
|
+
broker_web_search(state, None, query, max_results.min(20).max(1), use_tor, None).await?;
|
|
817
|
+
let text = serde_json::to_string_pretty(&results)?;
|
|
818
|
+
|
|
819
|
+
Ok(ToolResult {
|
|
820
|
+
content: vec![ToolContent {
|
|
821
|
+
content_type: "text".to_string(),
|
|
822
|
+
text,
|
|
823
|
+
}],
|
|
824
|
+
is_error: false,
|
|
825
|
+
})
|
|
826
|
+
}
|
|
827
|
+
"tor_search" => {
|
|
828
|
+
let query = arguments
|
|
829
|
+
.get("query")
|
|
830
|
+
.and_then(|v| v.as_str())
|
|
831
|
+
.ok_or_else(|| anyhow!("Missing 'query' parameter"))?;
|
|
832
|
+
let max_results = arguments
|
|
833
|
+
.get("max_results")
|
|
834
|
+
.and_then(|v| v.as_u64())
|
|
835
|
+
.unwrap_or(5) as usize;
|
|
836
|
+
let engines: Vec<String> = arguments
|
|
837
|
+
.get("engines")
|
|
838
|
+
.and_then(|v| v.as_array())
|
|
839
|
+
.map(|arr| {
|
|
840
|
+
arr.iter()
|
|
841
|
+
.filter_map(|e| e.as_str())
|
|
842
|
+
.map(|s| s.to_lowercase())
|
|
843
|
+
.collect()
|
|
844
|
+
})
|
|
845
|
+
.unwrap_or_default();
|
|
846
|
+
let engines_filter = if engines.is_empty() { None } else { Some(engines.as_slice()) };
|
|
569
847
|
|
|
570
|
-
let results =
|
|
848
|
+
let results =
|
|
849
|
+
broker_web_search(state, None, query, max_results.min(20).max(1), true, engines_filter).await?;
|
|
571
850
|
let text = serde_json::to_string_pretty(&results)?;
|
|
572
851
|
|
|
573
852
|
Ok(ToolResult {
|
|
@@ -584,7 +863,7 @@ async fn handle_tool_call(name: &str, arguments: serde_json::Value) -> Result<To
|
|
|
584
863
|
.and_then(|v| v.as_str())
|
|
585
864
|
.ok_or_else(|| anyhow!("Missing 'url' parameter"))?;
|
|
586
865
|
|
|
587
|
-
let content = web_fetch_page(url).await?;
|
|
866
|
+
let content = web_fetch_page(url, &state.transport).await?;
|
|
588
867
|
|
|
589
868
|
Ok(ToolResult {
|
|
590
869
|
content: vec![ToolContent {
|
|
@@ -607,9 +886,31 @@ async fn handle_tool_call(name: &str, arguments: serde_json::Value) -> Result<To
|
|
|
607
886
|
.get("with_magnets")
|
|
608
887
|
.and_then(|v| v.as_bool())
|
|
609
888
|
.unwrap_or(true);
|
|
889
|
+
let providers = arguments
|
|
890
|
+
.get("providers")
|
|
891
|
+
.and_then(|v| v.as_array())
|
|
892
|
+
.map(|items| {
|
|
893
|
+
items
|
|
894
|
+
.iter()
|
|
895
|
+
.filter_map(|item| item.as_str().map(|s| s.to_string()))
|
|
896
|
+
.collect::<Vec<_>>()
|
|
897
|
+
})
|
|
898
|
+
.unwrap_or_default();
|
|
899
|
+
let use_tor = arguments
|
|
900
|
+
.get("use_tor")
|
|
901
|
+
.and_then(|v| v.as_bool())
|
|
902
|
+
.unwrap_or(false);
|
|
610
903
|
|
|
611
|
-
let results =
|
|
612
|
-
|
|
904
|
+
let results = torrent_search(
|
|
905
|
+
state,
|
|
906
|
+
None,
|
|
907
|
+
query,
|
|
908
|
+
max_results.min(20).max(1),
|
|
909
|
+
with_magnets,
|
|
910
|
+
&providers,
|
|
911
|
+
use_tor,
|
|
912
|
+
)
|
|
913
|
+
.await?;
|
|
613
914
|
let text = serde_json::to_string_pretty(&results)?;
|
|
614
915
|
|
|
615
916
|
Ok(ToolResult {
|
|
@@ -641,18 +942,34 @@ async fn handle_tool_call(name: &str, arguments: serde_json::Value) -> Result<To
|
|
|
641
942
|
is_error: false,
|
|
642
943
|
})
|
|
643
944
|
}
|
|
945
|
+
"search_status" => {
|
|
946
|
+
let status = search_status(state).await;
|
|
947
|
+
let text = serde_json::to_string_pretty(&status)?;
|
|
948
|
+
|
|
949
|
+
Ok(ToolResult {
|
|
950
|
+
content: vec![ToolContent {
|
|
951
|
+
content_type: "text".to_string(),
|
|
952
|
+
text,
|
|
953
|
+
}],
|
|
954
|
+
is_error: false,
|
|
955
|
+
})
|
|
956
|
+
}
|
|
644
957
|
_ => Err(anyhow!("Unknown tool: {}", name)),
|
|
645
958
|
}
|
|
646
959
|
}
|
|
647
960
|
|
|
648
961
|
async fn broker_web_search(
|
|
962
|
+
state: &DiscoveryState,
|
|
649
963
|
endpoint_override: Option<&str>,
|
|
650
964
|
query: &str,
|
|
651
965
|
max_results: usize,
|
|
966
|
+
use_tor: bool,
|
|
967
|
+
engines_filter: Option<&[String]>,
|
|
652
968
|
) -> Result<Vec<SearchResult>> {
|
|
653
969
|
let max_results = max_results.min(20).max(1);
|
|
654
970
|
let endpoints = resolve_searx_endpoints(endpoint_override);
|
|
655
|
-
let mut reports =
|
|
971
|
+
let mut reports =
|
|
972
|
+
collect_provider_reports(state, &endpoints, query, max_results, use_tor, engines_filter).await;
|
|
656
973
|
let mut merged = Vec::new();
|
|
657
974
|
for report in &reports {
|
|
658
975
|
merged.extend(report.items.clone());
|
|
@@ -667,7 +984,8 @@ async fn broker_web_search(
|
|
|
667
984
|
if let Some(relaxed_query) = relax_search_query(query) {
|
|
668
985
|
if relaxed_query != query {
|
|
669
986
|
let retry_reports =
|
|
670
|
-
collect_provider_reports(&endpoints, &relaxed_query, max_results)
|
|
987
|
+
collect_provider_reports(state, &endpoints, &relaxed_query, max_results, use_tor, engines_filter)
|
|
988
|
+
.await;
|
|
671
989
|
let mut retry_merged = Vec::new();
|
|
672
990
|
for report in &retry_reports {
|
|
673
991
|
retry_merged.extend(report.items.clone());
|
|
@@ -684,7 +1002,8 @@ async fn broker_web_search(
|
|
|
684
1002
|
if let Some(topic_query) = topic_focus_query(query) {
|
|
685
1003
|
if topic_query != query {
|
|
686
1004
|
let topic_reports =
|
|
687
|
-
collect_provider_reports(&endpoints, &topic_query, max_results)
|
|
1005
|
+
collect_provider_reports(state, &endpoints, &topic_query, max_results, use_tor, engines_filter)
|
|
1006
|
+
.await;
|
|
688
1007
|
let mut topic_merged = Vec::new();
|
|
689
1008
|
for report in &topic_reports {
|
|
690
1009
|
topic_merged.extend(report.items.clone());
|
|
@@ -724,31 +1043,67 @@ async fn broker_web_search(
|
|
|
724
1043
|
}
|
|
725
1044
|
}
|
|
726
1045
|
|
|
1046
|
+
fn engine_allowed(filter: Option<&[String]>, name: &str) -> bool {
|
|
1047
|
+
match filter {
|
|
1048
|
+
None => true,
|
|
1049
|
+
Some(list) if list.is_empty() => true,
|
|
1050
|
+
Some(list) => list.iter().any(|e| e.eq_ignore_ascii_case(name)),
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
|
|
727
1054
|
async fn collect_provider_reports(
|
|
1055
|
+
state: &DiscoveryState,
|
|
728
1056
|
endpoints: &[String],
|
|
729
1057
|
query: &str,
|
|
730
1058
|
max_results: usize,
|
|
1059
|
+
use_tor: bool,
|
|
1060
|
+
engines_filter: Option<&[String]>,
|
|
731
1061
|
) -> Vec<SearchProviderReport> {
|
|
732
|
-
let
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
let wiki_future =
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
let
|
|
744
|
-
|
|
745
|
-
|
|
1062
|
+
let sub = (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS);
|
|
1063
|
+
let searx_future = async {
|
|
1064
|
+
if engine_allowed(engines_filter, "searxng") {
|
|
1065
|
+
searx_search_broker(endpoints, query, (max_results * 3).min(MAX_PROVIDER_RESULTS), MAX_SEARX_PARALLEL_ENDPOINTS).await
|
|
1066
|
+
} else { Ok(Vec::new()) }
|
|
1067
|
+
};
|
|
1068
|
+
let wiki_future = async {
|
|
1069
|
+
if engine_allowed(engines_filter, "wikipedia") {
|
|
1070
|
+
wikipedia_search(query, sub).await
|
|
1071
|
+
} else { Ok(Vec::new()) }
|
|
1072
|
+
};
|
|
1073
|
+
let news_future = async {
|
|
1074
|
+
if engine_allowed(engines_filter, "news") {
|
|
1075
|
+
google_news_search(query, sub).await
|
|
1076
|
+
} else { Ok(Vec::new()) }
|
|
1077
|
+
};
|
|
1078
|
+
let brave_future = async {
|
|
1079
|
+
if engine_allowed(engines_filter, "brave") {
|
|
1080
|
+
brave_html_search(query, sub).await
|
|
1081
|
+
} else { Ok(Vec::new()) }
|
|
1082
|
+
};
|
|
1083
|
+
let duckduckgo_future = async {
|
|
1084
|
+
if engine_allowed(engines_filter, "duckduckgo") {
|
|
1085
|
+
duckduckgo_html_search(query, sub).await
|
|
1086
|
+
} else { Ok(Vec::new()) }
|
|
1087
|
+
};
|
|
1088
|
+
let bing_future = async {
|
|
1089
|
+
if engine_allowed(engines_filter, "bing") {
|
|
1090
|
+
bing_rss_search(query, sub).await
|
|
1091
|
+
} else { Ok(Vec::new()) }
|
|
1092
|
+
};
|
|
1093
|
+
let ahmia_future = async {
|
|
1094
|
+
if engine_allowed(engines_filter, "ahmia") {
|
|
1095
|
+
ahmia_search(&state.transport, query, sub, use_tor).await
|
|
1096
|
+
} else { Ok(Vec::new()) }
|
|
1097
|
+
};
|
|
1098
|
+
|
|
1099
|
+
let (searx, wiki, news, brave, duckduckgo, bing, ahmia) = tokio::join!(
|
|
746
1100
|
searx_future,
|
|
747
1101
|
wiki_future,
|
|
748
1102
|
news_future,
|
|
749
1103
|
brave_future,
|
|
750
1104
|
duckduckgo_future,
|
|
751
|
-
bing_future
|
|
1105
|
+
bing_future,
|
|
1106
|
+
ahmia_future
|
|
752
1107
|
);
|
|
753
1108
|
|
|
754
1109
|
vec![
|
|
@@ -758,6 +1113,7 @@ async fn collect_provider_reports(
|
|
|
758
1113
|
as_provider_report("brave-html", brave),
|
|
759
1114
|
as_provider_report("duckduckgo", duckduckgo),
|
|
760
1115
|
as_provider_report("bing-rss", bing),
|
|
1116
|
+
as_provider_report("ahmia", ahmia),
|
|
761
1117
|
]
|
|
762
1118
|
}
|
|
763
1119
|
|
|
@@ -1214,6 +1570,7 @@ async fn web_search_single_endpoint(
|
|
|
1214
1570
|
source_domain: source_domain(&item.url),
|
|
1215
1571
|
score: None,
|
|
1216
1572
|
endpoint: Some(endpoint.to_string()),
|
|
1573
|
+
via: "searx".to_string(),
|
|
1217
1574
|
})
|
|
1218
1575
|
.collect::<Vec<_>>();
|
|
1219
1576
|
return Ok(mapped);
|
|
@@ -1324,6 +1681,7 @@ async fn wikipedia_search(query: &str, max_results: usize) -> Result<Vec<SearchR
|
|
|
1324
1681
|
source_domain: source_domain(&page_url),
|
|
1325
1682
|
score: None,
|
|
1326
1683
|
endpoint: None,
|
|
1684
|
+
via: "clearnet".to_string(),
|
|
1327
1685
|
}
|
|
1328
1686
|
})
|
|
1329
1687
|
.collect::<Vec<_>>();
|
|
@@ -1424,6 +1782,7 @@ async fn google_news_region_search(
|
|
|
1424
1782
|
source_domain: source_domain(&item.link),
|
|
1425
1783
|
score: None,
|
|
1426
1784
|
endpoint: None,
|
|
1785
|
+
via: "clearnet".to_string(),
|
|
1427
1786
|
})
|
|
1428
1787
|
.collect::<Vec<_>>())
|
|
1429
1788
|
}
|
|
@@ -1506,6 +1865,7 @@ async fn brave_html_search(query: &str, max_results: usize) -> Result<Vec<Search
|
|
|
1506
1865
|
source_domain: source_domain(href),
|
|
1507
1866
|
score: None,
|
|
1508
1867
|
endpoint: Some("https://search.brave.com/search".to_string()),
|
|
1868
|
+
via: "clearnet".to_string(),
|
|
1509
1869
|
});
|
|
1510
1870
|
}
|
|
1511
1871
|
|
|
@@ -1576,6 +1936,7 @@ async fn duckduckgo_html_search(query: &str, max_results: usize) -> Result<Vec<S
|
|
|
1576
1936
|
source_domain: source_domain(&url),
|
|
1577
1937
|
score: None,
|
|
1578
1938
|
endpoint: Some("https://lite.duckduckgo.com/lite".to_string()),
|
|
1939
|
+
via: "clearnet".to_string(),
|
|
1579
1940
|
});
|
|
1580
1941
|
}
|
|
1581
1942
|
if !results.is_empty() {
|
|
@@ -1647,6 +2008,7 @@ async fn duckduckgo_html_search(query: &str, max_results: usize) -> Result<Vec<S
|
|
|
1647
2008
|
source_domain: source_domain(&url),
|
|
1648
2009
|
score: None,
|
|
1649
2010
|
endpoint: Some("https://duckduckgo.com/html".to_string()),
|
|
2011
|
+
via: "clearnet".to_string(),
|
|
1650
2012
|
});
|
|
1651
2013
|
}
|
|
1652
2014
|
|
|
@@ -1673,6 +2035,7 @@ async fn duckduckgo_html_search(query: &str, max_results: usize) -> Result<Vec<S
|
|
|
1673
2035
|
source_domain: source_domain(&url),
|
|
1674
2036
|
score: None,
|
|
1675
2037
|
endpoint: Some("https://duckduckgo.com/html".to_string()),
|
|
2038
|
+
via: "clearnet".to_string(),
|
|
1676
2039
|
});
|
|
1677
2040
|
}
|
|
1678
2041
|
}
|
|
@@ -1751,10 +2114,74 @@ async fn bing_rss_search(query: &str, max_results: usize) -> Result<Vec<SearchRe
|
|
|
1751
2114
|
source_domain: source_domain(&item.link),
|
|
1752
2115
|
score: None,
|
|
1753
2116
|
endpoint: Some("https://www.bing.com/search?format=rss".to_string()),
|
|
2117
|
+
via: "clearnet".to_string(),
|
|
1754
2118
|
})
|
|
1755
2119
|
.collect::<Vec<_>>())
|
|
1756
2120
|
}
|
|
1757
2121
|
|
|
2122
|
+
async fn ahmia_search(
|
|
2123
|
+
transport: &TransportLayer,
|
|
2124
|
+
query: &str,
|
|
2125
|
+
max_results: usize,
|
|
2126
|
+
use_tor: bool,
|
|
2127
|
+
) -> Result<Vec<SearchResult>> {
|
|
2128
|
+
let (client, via, base_url) = transport.client_for_ahmia(use_tor);
|
|
2129
|
+
let response = client.get(base_url).query(&[("q", query)]).send().await?;
|
|
2130
|
+
if !response.status().is_success() {
|
|
2131
|
+
return Err(anyhow!("ahmia provider failed: HTTP {}", response.status()));
|
|
2132
|
+
}
|
|
2133
|
+
let html = response.text().await?;
|
|
2134
|
+
let document = Html::parse_document(&html);
|
|
2135
|
+
let result_selector = Selector::parse(".result, .search-result, li")
|
|
2136
|
+
.map_err(|e| anyhow!("Invalid Ahmia result selector: {}", e))?;
|
|
2137
|
+
let link_selector =
|
|
2138
|
+
Selector::parse("a").map_err(|e| anyhow!("Invalid Ahmia link selector: {}", e))?;
|
|
2139
|
+
|
|
2140
|
+
let mut output = Vec::new();
|
|
2141
|
+
for result in document.select(&result_selector) {
|
|
2142
|
+
if output.len() >= max_results.min(MAX_PROVIDER_RESULTS) {
|
|
2143
|
+
break;
|
|
2144
|
+
}
|
|
2145
|
+
for link in result.select(&link_selector) {
|
|
2146
|
+
let href = link.value().attr("href").unwrap_or("").trim();
|
|
2147
|
+
let title = link.text().collect::<String>().trim().to_string();
|
|
2148
|
+
if href.is_empty() || title.len() < 3 {
|
|
2149
|
+
continue;
|
|
2150
|
+
}
|
|
2151
|
+
let resolved = if href.starts_with("http://") || href.starts_with("https://") {
|
|
2152
|
+
href.to_string()
|
|
2153
|
+
} else {
|
|
2154
|
+
format!("{}{}", base_url.trim_end_matches('/'), href)
|
|
2155
|
+
};
|
|
2156
|
+
let content = result
|
|
2157
|
+
.text()
|
|
2158
|
+
.collect::<Vec<_>>()
|
|
2159
|
+
.join(" ")
|
|
2160
|
+
.split_whitespace()
|
|
2161
|
+
.collect::<Vec<_>>()
|
|
2162
|
+
.join(" ");
|
|
2163
|
+
output.push(SearchResult {
|
|
2164
|
+
title,
|
|
2165
|
+
url: resolved.clone(),
|
|
2166
|
+
content: truncate_text(&content, 400),
|
|
2167
|
+
engine: "ahmia".to_string(),
|
|
2168
|
+
provider: "ahmia".to_string(),
|
|
2169
|
+
source_domain: source_domain(&resolved),
|
|
2170
|
+
score: None,
|
|
2171
|
+
endpoint: Some(base_url.to_string()),
|
|
2172
|
+
via: via.to_string(),
|
|
2173
|
+
});
|
|
2174
|
+
break;
|
|
2175
|
+
}
|
|
2176
|
+
}
|
|
2177
|
+
|
|
2178
|
+
if output.is_empty() {
|
|
2179
|
+
Err(anyhow!("ahmia provider returned no results"))
|
|
2180
|
+
} else {
|
|
2181
|
+
Ok(output)
|
|
2182
|
+
}
|
|
2183
|
+
}
|
|
2184
|
+
|
|
1758
2185
|
async fn direct_domain_probe(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
|
|
1759
2186
|
let mut tokens = query
|
|
1760
2187
|
.split_whitespace()
|
|
@@ -1813,6 +2240,7 @@ async fn direct_domain_probe(query: &str, max_results: usize) -> Result<Vec<Sear
|
|
|
1813
2240
|
source_domain: source_domain(&url),
|
|
1814
2241
|
score: None,
|
|
1815
2242
|
endpoint: None,
|
|
2243
|
+
via: "clearnet".to_string(),
|
|
1816
2244
|
});
|
|
1817
2245
|
}
|
|
1818
2246
|
|
|
@@ -2107,11 +2535,8 @@ fn now_unix_secs() -> u64 {
|
|
|
2107
2535
|
.unwrap_or(0)
|
|
2108
2536
|
}
|
|
2109
2537
|
|
|
2110
|
-
async fn web_fetch_page(url: &str) -> Result<String> {
|
|
2111
|
-
let client =
|
|
2112
|
-
.user_agent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36")
|
|
2113
|
-
.timeout(Duration::from_secs(DEFAULT_FETCH_TIMEOUT_SECS))
|
|
2114
|
-
.build()?;
|
|
2538
|
+
async fn web_fetch_page(url: &str, transport: &TransportLayer) -> Result<String> {
|
|
2539
|
+
let (client, _via) = transport.client_for(url);
|
|
2115
2540
|
let response = client.get(url).send().await?;
|
|
2116
2541
|
if !response.status().is_success() {
|
|
2117
2542
|
return Err(anyhow!("fetch failed: HTTP {}", response.status()));
|
|
@@ -2165,10 +2590,13 @@ async fn web_fetch_page(url: &str) -> Result<String> {
|
|
|
2165
2590
|
}
|
|
2166
2591
|
|
|
2167
2592
|
async fn torrent_search(
|
|
2593
|
+
state: &DiscoveryState,
|
|
2168
2594
|
endpoint_override: Option<&str>,
|
|
2169
2595
|
query: &str,
|
|
2170
2596
|
max_results: usize,
|
|
2171
2597
|
with_magnets: bool,
|
|
2598
|
+
providers: &[String],
|
|
2599
|
+
use_tor: bool,
|
|
2172
2600
|
) -> Result<Vec<TorrentSearchResult>> {
|
|
2173
2601
|
let normalized_query = normalize_torrent_query(query);
|
|
2174
2602
|
let direct_query = query.trim();
|
|
@@ -2178,21 +2606,96 @@ async fn torrent_search(
|
|
|
2178
2606
|
direct_query
|
|
2179
2607
|
};
|
|
2180
2608
|
let endpoints = resolve_searx_endpoints(endpoint_override);
|
|
2609
|
+
let allowed = normalize_provider_filter(providers);
|
|
2610
|
+
let wants_all = allowed.is_empty();
|
|
2611
|
+
|
|
2612
|
+
let mirror_future = async {
|
|
2613
|
+
if wants_all || has_provider(&allowed, "mirror") {
|
|
2614
|
+
search_mirror_catalog(
|
|
2615
|
+
&state.torrent_providers,
|
|
2616
|
+
&state.transport,
|
|
2617
|
+
&state.doh,
|
|
2618
|
+
Some(&state.cache),
|
|
2619
|
+
direct_query,
|
|
2620
|
+
max_results * 2,
|
|
2621
|
+
if allowed.is_empty() {
|
|
2622
|
+
None
|
|
2623
|
+
} else {
|
|
2624
|
+
Some(&allowed)
|
|
2625
|
+
},
|
|
2626
|
+
use_tor,
|
|
2627
|
+
)
|
|
2628
|
+
.await
|
|
2629
|
+
} else {
|
|
2630
|
+
Ok(Vec::new())
|
|
2631
|
+
}
|
|
2632
|
+
};
|
|
2633
|
+
let searx_future = async {
|
|
2634
|
+
if wants_all || has_provider(&allowed, "searx") {
|
|
2635
|
+
searx_search_broker(
|
|
2636
|
+
&endpoints,
|
|
2637
|
+
&normalized_query,
|
|
2638
|
+
(max_results * 2).min(MAX_PROVIDER_RESULTS),
|
|
2639
|
+
MAX_SEARX_PARALLEL_ENDPOINTS,
|
|
2640
|
+
)
|
|
2641
|
+
.await
|
|
2642
|
+
} else {
|
|
2643
|
+
Ok(Vec::new())
|
|
2644
|
+
}
|
|
2645
|
+
};
|
|
2646
|
+
let nyaa_future = async {
|
|
2647
|
+
if wants_all || has_provider(&allowed, "nyaa") {
|
|
2648
|
+
nyaa_torrent_search(direct_query, max_results * 2).await
|
|
2649
|
+
} else {
|
|
2650
|
+
Ok(Vec::new())
|
|
2651
|
+
}
|
|
2652
|
+
};
|
|
2653
|
+
let archive_future = async {
|
|
2654
|
+
if wants_all || has_provider(&allowed, "archive") {
|
|
2655
|
+
archive_torrent_search(direct_query, max_results * 2).await
|
|
2656
|
+
} else {
|
|
2657
|
+
Ok(Vec::new())
|
|
2658
|
+
}
|
|
2659
|
+
};
|
|
2181
2660
|
|
|
2182
|
-
let
|
|
2183
|
-
|
|
2184
|
-
&normalized_query,
|
|
2185
|
-
(max_results * 2).min(MAX_PROVIDER_RESULTS),
|
|
2186
|
-
MAX_SEARX_PARALLEL_ENDPOINTS,
|
|
2187
|
-
);
|
|
2188
|
-
let nyaa_future = nyaa_torrent_search(direct_query, max_results * 2);
|
|
2189
|
-
let archive_future = archive_torrent_search(direct_query, max_results * 2);
|
|
2190
|
-
|
|
2191
|
-
let (searx, nyaa, archive) = tokio::join!(searx_future, nyaa_future, archive_future);
|
|
2661
|
+
let (mirror, searx, nyaa, archive) =
|
|
2662
|
+
tokio::join!(mirror_future, searx_future, nyaa_future, archive_future);
|
|
2192
2663
|
|
|
2193
2664
|
let mut output = Vec::new();
|
|
2194
2665
|
let mut provider_errors = Vec::new();
|
|
2195
2666
|
|
|
2667
|
+
match mirror {
|
|
2668
|
+
Ok(items) => {
|
|
2669
|
+
for item in items {
|
|
2670
|
+
output.push(TorrentSearchResult {
|
|
2671
|
+
title: item.title,
|
|
2672
|
+
url: item.url,
|
|
2673
|
+
content: item.content,
|
|
2674
|
+
engine: "mirror-catalog".to_string(),
|
|
2675
|
+
magnet_links: if with_magnets {
|
|
2676
|
+
item.magnet_links.clone()
|
|
2677
|
+
} else {
|
|
2678
|
+
Vec::new()
|
|
2679
|
+
},
|
|
2680
|
+
provider: item.provider,
|
|
2681
|
+
via: item.via,
|
|
2682
|
+
source_url: item.source_url,
|
|
2683
|
+
info_hash: item
|
|
2684
|
+
.magnet_links
|
|
2685
|
+
.iter()
|
|
2686
|
+
.find_map(|magnet| extract_info_hash(magnet)),
|
|
2687
|
+
size_bytes: item.size.as_deref().and_then(parse_size_bytes),
|
|
2688
|
+
size: item.size,
|
|
2689
|
+
seeds: item.seeds,
|
|
2690
|
+
leeches: item.leeches,
|
|
2691
|
+
uploader: item.uploader,
|
|
2692
|
+
category: item.category,
|
|
2693
|
+
});
|
|
2694
|
+
}
|
|
2695
|
+
}
|
|
2696
|
+
Err(e) => provider_errors.push(format!("mirror: {}", e)),
|
|
2697
|
+
}
|
|
2698
|
+
|
|
2196
2699
|
match nyaa {
|
|
2197
2700
|
Ok(mut items) => output.append(&mut items),
|
|
2198
2701
|
Err(e) => provider_errors.push(format!("nyaa: {}", e)),
|
|
@@ -2218,6 +2721,20 @@ async fn torrent_search(
|
|
|
2218
2721
|
result.engine
|
|
2219
2722
|
},
|
|
2220
2723
|
magnet_links,
|
|
2724
|
+
provider: "searx".to_string(),
|
|
2725
|
+
via: if result.via.trim().is_empty() {
|
|
2726
|
+
"searx".to_string()
|
|
2727
|
+
} else {
|
|
2728
|
+
result.via
|
|
2729
|
+
},
|
|
2730
|
+
source_url: result.endpoint.unwrap_or_else(|| "searx".to_string()),
|
|
2731
|
+
info_hash: None,
|
|
2732
|
+
size: None,
|
|
2733
|
+
size_bytes: None,
|
|
2734
|
+
seeds: None,
|
|
2735
|
+
leeches: None,
|
|
2736
|
+
uploader: None,
|
|
2737
|
+
category: None,
|
|
2221
2738
|
});
|
|
2222
2739
|
}
|
|
2223
2740
|
}
|
|
@@ -2254,7 +2771,15 @@ fn dedup_torrent_results(
|
|
|
2254
2771
|
let mut deduped = Vec::with_capacity(results.len());
|
|
2255
2772
|
|
|
2256
2773
|
for mut item in results.drain(..) {
|
|
2257
|
-
let key =
|
|
2774
|
+
let key = item
|
|
2775
|
+
.info_hash
|
|
2776
|
+
.clone()
|
|
2777
|
+
.or_else(|| {
|
|
2778
|
+
item.magnet_links
|
|
2779
|
+
.first()
|
|
2780
|
+
.map(|value| sanitize_magnet(value))
|
|
2781
|
+
})
|
|
2782
|
+
.unwrap_or_else(|| normalize_url_key(&item.url));
|
|
2258
2783
|
if key.is_empty() || !seen.insert(key) {
|
|
2259
2784
|
continue;
|
|
2260
2785
|
}
|
|
@@ -2276,7 +2801,9 @@ fn dedup_torrent_results(
|
|
|
2276
2801
|
|
|
2277
2802
|
fn torrent_engine_rank(engine: &str) -> u8 {
|
|
2278
2803
|
let lowered = engine.to_lowercase();
|
|
2279
|
-
if lowered.contains("
|
|
2804
|
+
if lowered.contains("mirror") {
|
|
2805
|
+
5
|
|
2806
|
+
} else if lowered.contains("nyaa") {
|
|
2280
2807
|
4
|
|
2281
2808
|
} else if lowered.contains("searx") {
|
|
2282
2809
|
3
|
|
@@ -2360,6 +2887,16 @@ async fn nyaa_torrent_search(query: &str, max_results: usize) -> Result<Vec<Torr
|
|
|
2360
2887
|
},
|
|
2361
2888
|
engine: "nyaa-html".to_string(),
|
|
2362
2889
|
magnet_links: magnets,
|
|
2890
|
+
provider: "nyaa".to_string(),
|
|
2891
|
+
via: "clearnet".to_string(),
|
|
2892
|
+
source_url: url.clone(),
|
|
2893
|
+
info_hash: None,
|
|
2894
|
+
size: None,
|
|
2895
|
+
size_bytes: None,
|
|
2896
|
+
seeds: None,
|
|
2897
|
+
leeches: None,
|
|
2898
|
+
uploader: None,
|
|
2899
|
+
category: None,
|
|
2363
2900
|
});
|
|
2364
2901
|
}
|
|
2365
2902
|
|
|
@@ -2419,6 +2956,16 @@ async fn archive_torrent_search(
|
|
|
2419
2956
|
},
|
|
2420
2957
|
engine: "archive-search".to_string(),
|
|
2421
2958
|
magnet_links: Vec::new(),
|
|
2959
|
+
provider: "archive".to_string(),
|
|
2960
|
+
via: "clearnet".to_string(),
|
|
2961
|
+
source_url: url.clone(),
|
|
2962
|
+
info_hash: None,
|
|
2963
|
+
size: None,
|
|
2964
|
+
size_bytes: None,
|
|
2965
|
+
seeds: None,
|
|
2966
|
+
leeches: None,
|
|
2967
|
+
uploader: None,
|
|
2968
|
+
category: None,
|
|
2422
2969
|
});
|
|
2423
2970
|
}
|
|
2424
2971
|
|
|
@@ -2445,6 +2992,102 @@ fn archive_description_to_string(value: Option<serde_json::Value>) -> String {
|
|
|
2445
2992
|
}
|
|
2446
2993
|
}
|
|
2447
2994
|
|
|
2995
|
+
fn normalize_provider_filter(providers: &[String]) -> HashSet<String> {
|
|
2996
|
+
providers
|
|
2997
|
+
.iter()
|
|
2998
|
+
.map(|value| value.trim().to_lowercase())
|
|
2999
|
+
.filter(|value| !value.is_empty())
|
|
3000
|
+
.collect()
|
|
3001
|
+
}
|
|
3002
|
+
|
|
3003
|
+
fn has_provider(providers: &HashSet<String>, provider: &str) -> bool {
|
|
3004
|
+
providers.contains(provider) || providers.contains(&provider.to_lowercase())
|
|
3005
|
+
}
|
|
3006
|
+
|
|
3007
|
+
async fn search_status(state: &DiscoveryState) -> SearchStatusSnapshot {
|
|
3008
|
+
let endpoint_map = endpoint_health_store().lock().await.clone();
|
|
3009
|
+
let mut searxng_endpoints = resolve_searx_endpoints(None)
|
|
3010
|
+
.into_iter()
|
|
3011
|
+
.map(|endpoint| {
|
|
3012
|
+
let stats = endpoint_map.get(&endpoint).cloned().unwrap_or_default();
|
|
3013
|
+
SearxEndpointStatus {
|
|
3014
|
+
url: endpoint,
|
|
3015
|
+
successes: stats.successes,
|
|
3016
|
+
failures: stats.failures,
|
|
3017
|
+
cooldown_until: stats.cooldown_until,
|
|
3018
|
+
last_error: stats.last_error,
|
|
3019
|
+
}
|
|
3020
|
+
})
|
|
3021
|
+
.collect::<Vec<_>>();
|
|
3022
|
+
searxng_endpoints.sort_by(|a, b| a.url.cmp(&b.url));
|
|
3023
|
+
|
|
3024
|
+
SearchStatusSnapshot {
|
|
3025
|
+
plugin_version: MODULE_VERSION.to_string(),
|
|
3026
|
+
tor: TorStatus {
|
|
3027
|
+
available: state.transport.tor_available(),
|
|
3028
|
+
port: state.transport.tor_port(),
|
|
3029
|
+
mode: "auto-fallback".to_string(),
|
|
3030
|
+
},
|
|
3031
|
+
doh: DohStatus {
|
|
3032
|
+
primary: "cloudflare-dns.com".to_string(),
|
|
3033
|
+
fallback: "dns.google".to_string(),
|
|
3034
|
+
},
|
|
3035
|
+
magnet_cache: MagnetCacheStatus {
|
|
3036
|
+
entries: state.cache.size().await,
|
|
3037
|
+
ttl_hours: MagnetCache::ttl_hours(),
|
|
3038
|
+
},
|
|
3039
|
+
searxng_endpoints,
|
|
3040
|
+
torrent_providers: provider_statuses(&state.torrent_providers),
|
|
3041
|
+
}
|
|
3042
|
+
}
|
|
3043
|
+
|
|
3044
|
+
fn default_data_dir() -> Option<PathBuf> {
|
|
3045
|
+
if let Ok(value) = std::env::var("MASIX_DATA_DIR") {
|
|
3046
|
+
let trimmed = value.trim();
|
|
3047
|
+
if !trimmed.is_empty() {
|
|
3048
|
+
return Some(PathBuf::from(trimmed));
|
|
3049
|
+
}
|
|
3050
|
+
}
|
|
3051
|
+
dirs::home_dir().map(|home| home.join(".masix"))
|
|
3052
|
+
}
|
|
3053
|
+
|
|
3054
|
+
fn parse_size_bytes(value: &str) -> Option<u64> {
|
|
3055
|
+
let normalized = value.trim().replace(',', ".");
|
|
3056
|
+
let mut number = String::new();
|
|
3057
|
+
let mut unit = String::new();
|
|
3058
|
+
for ch in normalized.chars() {
|
|
3059
|
+
if ch.is_ascii_digit() || ch == '.' {
|
|
3060
|
+
number.push(ch);
|
|
3061
|
+
} else if !ch.is_whitespace() {
|
|
3062
|
+
unit.push(ch);
|
|
3063
|
+
}
|
|
3064
|
+
}
|
|
3065
|
+
let base = number.parse::<f64>().ok()?;
|
|
3066
|
+
let multiplier = match unit.to_lowercase().as_str() {
|
|
3067
|
+
"kb" | "kib" => 1024_f64,
|
|
3068
|
+
"mb" | "mib" => 1024_f64.powi(2),
|
|
3069
|
+
"gb" | "gib" => 1024_f64.powi(3),
|
|
3070
|
+
"tb" | "tib" => 1024_f64.powi(4),
|
|
3071
|
+
"" => 1_f64,
|
|
3072
|
+
_ => return None,
|
|
3073
|
+
};
|
|
3074
|
+
Some((base * multiplier) as u64)
|
|
3075
|
+
}
|
|
3076
|
+
|
|
3077
|
+
fn extract_info_hash(magnet: &str) -> Option<String> {
|
|
3078
|
+
let lower = magnet.to_lowercase();
|
|
3079
|
+
let needle = "xt=urn:btih:";
|
|
3080
|
+
let start = lower.find(needle)?;
|
|
3081
|
+
let suffix = &magnet[start + needle.len()..];
|
|
3082
|
+
let end = suffix.find('&').unwrap_or(suffix.len());
|
|
3083
|
+
let hash = suffix[..end].trim();
|
|
3084
|
+
if hash.is_empty() {
|
|
3085
|
+
None
|
|
3086
|
+
} else {
|
|
3087
|
+
Some(hash.to_string())
|
|
3088
|
+
}
|
|
3089
|
+
}
|
|
3090
|
+
|
|
2448
3091
|
fn truncate_text(value: &str, max_chars: usize) -> String {
|
|
2449
3092
|
if value.chars().count() <= max_chars {
|
|
2450
3093
|
return value.to_string();
|
|
@@ -2568,3 +3211,80 @@ fn url_encode(value: &str) -> String {
|
|
|
2568
3211
|
}
|
|
2569
3212
|
out
|
|
2570
3213
|
}
|
|
3214
|
+
|
|
3215
|
+
#[cfg(test)]
|
|
3216
|
+
mod tests {
|
|
3217
|
+
use super::*;
|
|
3218
|
+
|
|
3219
|
+
#[test]
|
|
3220
|
+
fn normalize_provider_filter_is_case_insensitive() {
|
|
3221
|
+
let items = normalize_provider_filter(&[
|
|
3222
|
+
"Nyaa".to_string(),
|
|
3223
|
+
" SeArX ".to_string(),
|
|
3224
|
+
"mirror".to_string(),
|
|
3225
|
+
]);
|
|
3226
|
+
assert!(items.contains("nyaa"));
|
|
3227
|
+
assert!(items.contains("searx"));
|
|
3228
|
+
assert!(items.contains("mirror"));
|
|
3229
|
+
}
|
|
3230
|
+
|
|
3231
|
+
#[test]
|
|
3232
|
+
fn parse_size_bytes_supports_common_units() {
|
|
3233
|
+
assert_eq!(parse_size_bytes("1.5 GB"), Some(1610612736));
|
|
3234
|
+
assert_eq!(parse_size_bytes("700MB"), Some(734003200));
|
|
3235
|
+
assert_eq!(parse_size_bytes("42"), Some(42));
|
|
3236
|
+
assert_eq!(parse_size_bytes("unknown"), None);
|
|
3237
|
+
}
|
|
3238
|
+
|
|
3239
|
+
#[test]
|
|
3240
|
+
fn extract_info_hash_reads_btih() {
|
|
3241
|
+
let magnet = "magnet:?xt=urn:btih:ABCDEF1234567890&dn=test";
|
|
3242
|
+
assert_eq!(
|
|
3243
|
+
extract_info_hash(magnet).as_deref(),
|
|
3244
|
+
Some("ABCDEF1234567890")
|
|
3245
|
+
);
|
|
3246
|
+
}
|
|
3247
|
+
|
|
3248
|
+
#[test]
|
|
3249
|
+
fn dedup_torrent_results_prefers_info_hash_key() {
|
|
3250
|
+
let results = vec![
|
|
3251
|
+
TorrentSearchResult {
|
|
3252
|
+
title: "A".to_string(),
|
|
3253
|
+
url: "https://example.com/a".to_string(),
|
|
3254
|
+
content: String::new(),
|
|
3255
|
+
engine: "mirror-catalog".to_string(),
|
|
3256
|
+
magnet_links: vec!["magnet:?xt=urn:btih:HASH1".to_string()],
|
|
3257
|
+
provider: "mirror".to_string(),
|
|
3258
|
+
via: "clearnet".to_string(),
|
|
3259
|
+
source_url: "https://example.com/search".to_string(),
|
|
3260
|
+
info_hash: Some("HASH1".to_string()),
|
|
3261
|
+
size: None,
|
|
3262
|
+
size_bytes: None,
|
|
3263
|
+
seeds: Some(10),
|
|
3264
|
+
leeches: None,
|
|
3265
|
+
uploader: None,
|
|
3266
|
+
category: None,
|
|
3267
|
+
},
|
|
3268
|
+
TorrentSearchResult {
|
|
3269
|
+
title: "B".to_string(),
|
|
3270
|
+
url: "https://example.com/b".to_string(),
|
|
3271
|
+
content: String::new(),
|
|
3272
|
+
engine: "mirror-catalog".to_string(),
|
|
3273
|
+
magnet_links: vec!["magnet:?xt=urn:btih:HASH1".to_string()],
|
|
3274
|
+
provider: "mirror".to_string(),
|
|
3275
|
+
via: "proxy".to_string(),
|
|
3276
|
+
source_url: "https://example.com/search".to_string(),
|
|
3277
|
+
info_hash: Some("HASH1".to_string()),
|
|
3278
|
+
size: None,
|
|
3279
|
+
size_bytes: None,
|
|
3280
|
+
seeds: Some(8),
|
|
3281
|
+
leeches: None,
|
|
3282
|
+
uploader: None,
|
|
3283
|
+
category: None,
|
|
3284
|
+
},
|
|
3285
|
+
];
|
|
3286
|
+
|
|
3287
|
+
let deduped = dedup_torrent_results(results, true);
|
|
3288
|
+
assert_eq!(deduped.len(), 1);
|
|
3289
|
+
}
|
|
3290
|
+
}
|