@mmmbuto/masix 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +18 -14
  2. package/install.js +53 -27
  3. package/package.json +4 -3
  4. package/packages/plugin-base/codex-backend/0.1.4/SHA256SUMS +3 -0
  5. package/packages/plugin-base/codex-backend/0.1.4/codex-backend-android-aarch64-termux.pkg +0 -0
  6. package/packages/plugin-base/codex-backend/0.1.4/codex-backend-linux-x86_64.pkg +0 -0
  7. package/packages/plugin-base/codex-backend/0.1.4/codex-backend-macos-aarch64.pkg +0 -0
  8. package/packages/plugin-base/codex-backend/0.1.4/manifest.json +33 -0
  9. package/packages/plugin-base/codex-backend/CHANGELOG.md +17 -0
  10. package/packages/plugin-base/codex-backend/README.md +33 -0
  11. package/packages/plugin-base/codex-backend/source/Cargo.toml +25 -0
  12. package/packages/plugin-base/codex-backend/source/README-PACKAGE.txt +54 -0
  13. package/packages/plugin-base/codex-backend/source/plugin.manifest.json +103 -0
  14. package/packages/plugin-base/codex-backend/source/src/error.rs +60 -0
  15. package/packages/plugin-base/codex-backend/source/src/exec.rs +436 -0
  16. package/packages/plugin-base/codex-backend/source/src/http_backend.rs +1198 -0
  17. package/packages/plugin-base/codex-backend/source/src/lib.rs +328 -0
  18. package/packages/plugin-base/codex-backend/source/src/patch.rs +767 -0
  19. package/packages/plugin-base/codex-backend/source/src/policy.rs +297 -0
  20. package/packages/plugin-base/codex-backend/source/src/tools.rs +72 -0
  21. package/packages/plugin-base/codex-backend/source/src/workspace.rs +433 -0
  22. package/packages/plugin-base/codex-tools/0.1.3/SHA256SUMS +3 -0
  23. package/packages/plugin-base/codex-tools/0.1.3/codex-tools-android-aarch64-termux.pkg +0 -0
  24. package/packages/plugin-base/codex-tools/0.1.3/codex-tools-linux-x86_64.pkg +0 -0
  25. package/packages/plugin-base/codex-tools/0.1.3/codex-tools-macos-aarch64.pkg +0 -0
  26. package/packages/plugin-base/codex-tools/0.1.3/manifest.json +33 -0
  27. package/packages/plugin-base/codex-tools/CHANGELOG.md +17 -0
  28. package/packages/plugin-base/codex-tools/README.md +33 -0
  29. package/packages/plugin-base/codex-tools/source/Cargo.toml +23 -0
  30. package/packages/plugin-base/codex-tools/source/plugin.manifest.json +124 -0
  31. package/packages/plugin-base/codex-tools/source/src/main.rs +995 -0
  32. package/packages/plugin-base/discovery/0.2.4/SHA256SUMS +3 -0
  33. package/packages/plugin-base/discovery/0.2.4/discovery-android-aarch64-termux.pkg +0 -0
  34. package/packages/plugin-base/discovery/0.2.4/discovery-linux-x86_64.pkg +0 -0
  35. package/packages/plugin-base/discovery/0.2.4/discovery-macos-aarch64.pkg +0 -0
  36. package/packages/plugin-base/discovery/0.2.4/manifest.json +31 -0
  37. package/packages/plugin-base/discovery/CHANGELOG.md +17 -0
  38. package/packages/plugin-base/discovery/README.md +48 -0
  39. package/packages/plugin-base/discovery/source/Cargo.toml +14 -0
  40. package/packages/plugin-base/discovery/source/plugin.manifest.json +30 -0
  41. package/packages/plugin-base/discovery/source/src/main.rs +2570 -0
  42. package/prebuilt/masix +0 -0
@@ -0,0 +1,2570 @@
1
+ use anyhow::{anyhow, Result};
2
+ use clap::{Parser, Subcommand};
3
+ use scraper::{Html, Selector};
4
+ use serde::{Deserialize, Serialize};
5
+ use std::cmp::Ordering;
6
+ use std::collections::{HashMap, HashSet};
7
+ use std::io::{self, BufRead, Write};
8
+ use std::sync::OnceLock;
9
+ use std::time::{Duration, SystemTime, UNIX_EPOCH};
10
+ use tokio::sync::Mutex;
11
+
12
+ const DEFAULT_SEARXNG_URLS: &[&str] = &[
13
+ "https://search.inetol.net",
14
+ "https://searx.work",
15
+ "https://search.privacyredirect.com",
16
+ ];
17
+ const MAX_WEB_CONTENT: usize = 15_000;
18
+ const SEARX_RETRIES: usize = 3;
19
+ const MAX_SEARX_PARALLEL_ENDPOINTS: usize = 3;
20
+ const MAX_ENDPOINTS_FROM_CONFIG: usize = 8;
21
+ const DEFAULT_SEARCH_TIMEOUT_SECS: u64 = 15;
22
+ const DEFAULT_FETCH_TIMEOUT_SECS: u64 = 20;
23
+ const MAX_PROVIDER_RESULTS: usize = 40;
24
+ const MODULE_VERSION: &str = env!("CARGO_PKG_VERSION");
25
+ const BROWSER_USER_AGENT: &str =
26
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0 Safari/537.36";
27
+ const NEWS_REGIONS: &[(&str, &str, &str)] = &[
28
+ ("US", "en-US", "US:en"),
29
+ ("GB", "en-GB", "GB:en"),
30
+ ("IT", "it-IT", "IT:it"),
31
+ ("DE", "de-DE", "DE:de"),
32
+ ("ES", "es-ES", "ES:es"),
33
+ ("JP", "ja", "JP:ja"),
34
+ ];
35
+
36
+ #[derive(Parser)]
37
+ #[command(name = "masix-plugin-discovery")]
38
+ #[command(about = "External discovery module for MasiX (web search + fetch)")]
39
+ struct Cli {
40
+ #[command(subcommand)]
41
+ command: Commands,
42
+ }
43
+
44
+ #[derive(Subcommand)]
45
+ enum Commands {
46
+ /// Search the web using autonomous multi-engine broker
47
+ WebSearch {
48
+ query: String,
49
+ #[arg(short, long, default_value_t = 5)]
50
+ max_results: usize,
51
+ #[arg(long)]
52
+ endpoint: Option<String>,
53
+ #[arg(short, long)]
54
+ json: bool,
55
+ },
56
+ /// Fetch and extract text content from a web page
57
+ WebFetch { url: String },
58
+ /// Search torrent metadata pages (lawful content only)
59
+ TorrentSearch {
60
+ query: String,
61
+ #[arg(short, long, default_value_t = 5)]
62
+ max_results: usize,
63
+ #[arg(long)]
64
+ endpoint: Option<String>,
65
+ #[arg(long, default_value_t = true)]
66
+ with_magnets: bool,
67
+ #[arg(short, long)]
68
+ json: bool,
69
+ },
70
+ /// Extract magnet links from a web page URL
71
+ TorrentExtract {
72
+ url: String,
73
+ #[arg(short, long, default_value_t = 5)]
74
+ max_links: usize,
75
+ #[arg(short, long)]
76
+ json: bool,
77
+ },
78
+ /// Print plugin metadata (draft)
79
+ Manifest,
80
+ /// Run MCP server over stdio (JSON-RPC)
81
+ ServeMcp,
82
+ }
83
+
84
+ #[derive(Debug, Deserialize)]
85
+ struct SearxResponse {
86
+ #[serde(default)]
87
+ results: Vec<SearxResultRaw>,
88
+ }
89
+
90
+ #[derive(Debug, Deserialize)]
91
+ struct SearxResultRaw {
92
+ #[serde(default)]
93
+ title: String,
94
+ #[serde(default)]
95
+ url: String,
96
+ #[serde(default, alias = "content")]
97
+ content: String,
98
+ #[serde(default)]
99
+ engine: String,
100
+ }
101
+
102
+ #[derive(Debug, Serialize, Clone)]
103
+ struct SearchResult {
104
+ title: String,
105
+ url: String,
106
+ content: String,
107
+ engine: String,
108
+ #[serde(default, skip_serializing_if = "String::is_empty")]
109
+ provider: String,
110
+ #[serde(default, skip_serializing_if = "String::is_empty")]
111
+ source_domain: String,
112
+ #[serde(default, skip_serializing_if = "Option::is_none")]
113
+ score: Option<f64>,
114
+ #[serde(default, skip_serializing_if = "Option::is_none")]
115
+ endpoint: Option<String>,
116
+ }
117
+
118
+ #[derive(Debug, Serialize)]
119
+ struct TorrentSearchResult {
120
+ title: String,
121
+ url: String,
122
+ content: String,
123
+ engine: String,
124
+ #[serde(default, skip_serializing_if = "Vec::is_empty")]
125
+ magnet_links: Vec<String>,
126
+ }
127
+
128
+ #[derive(Debug, Deserialize)]
129
+ struct JsonRpcRequest {
130
+ #[allow(dead_code)]
131
+ jsonrpc: String,
132
+ id: Option<serde_json::Value>,
133
+ method: String,
134
+ #[serde(default)]
135
+ params: serde_json::Value,
136
+ }
137
+
138
+ #[derive(Debug, Serialize)]
139
+ struct JsonRpcResponse {
140
+ jsonrpc: String,
141
+ id: Option<serde_json::Value>,
142
+ #[serde(skip_serializing_if = "Option::is_none")]
143
+ result: Option<serde_json::Value>,
144
+ #[serde(skip_serializing_if = "Option::is_none")]
145
+ error: Option<JsonRpcError>,
146
+ }
147
+
148
+ #[derive(Debug, Serialize)]
149
+ struct JsonRpcError {
150
+ code: i32,
151
+ message: String,
152
+ }
153
+
154
+ #[derive(Debug, Serialize)]
155
+ struct ToolDefinition {
156
+ name: String,
157
+ description: String,
158
+ input_schema: serde_json::Value,
159
+ }
160
+
161
+ #[derive(Debug, Serialize)]
162
+ struct ToolResult {
163
+ content: Vec<ToolContent>,
164
+ #[serde(skip_serializing_if = "is_false")]
165
+ is_error: bool,
166
+ }
167
+
168
+ fn is_false(v: &bool) -> bool {
169
+ !v
170
+ }
171
+
172
+ #[derive(Debug, Serialize)]
173
+ struct ToolContent {
174
+ #[serde(rename = "type")]
175
+ content_type: String,
176
+ text: String,
177
+ }
178
+
179
+ #[derive(Debug, Clone, Default)]
180
+ struct EndpointHealth {
181
+ successes: u32,
182
+ failures: u32,
183
+ cooldown_until: u64,
184
+ last_error: Option<String>,
185
+ }
186
+
187
+ #[derive(Debug, Clone)]
188
+ struct SearchProviderReport {
189
+ provider: &'static str,
190
+ items: Vec<SearchResult>,
191
+ error: Option<String>,
192
+ }
193
+
194
+ #[derive(Debug, Deserialize)]
195
+ struct WikipediaResponse {
196
+ #[serde(default)]
197
+ query: Option<WikipediaQuery>,
198
+ }
199
+
200
+ #[derive(Debug, Deserialize)]
201
+ struct WikipediaQuery {
202
+ #[serde(default)]
203
+ search: Vec<WikipediaEntry>,
204
+ }
205
+
206
+ #[derive(Debug, Deserialize)]
207
+ struct WikipediaEntry {
208
+ #[serde(default)]
209
+ title: String,
210
+ #[serde(default)]
211
+ snippet: String,
212
+ #[serde(default)]
213
+ pageid: u64,
214
+ }
215
+
216
+ #[derive(Debug, Deserialize)]
217
+ struct ArchiveResponse {
218
+ #[serde(default)]
219
+ response: ArchiveInnerResponse,
220
+ }
221
+
222
+ #[derive(Debug, Deserialize, Default)]
223
+ struct ArchiveInnerResponse {
224
+ #[serde(default)]
225
+ docs: Vec<ArchiveDoc>,
226
+ }
227
+
228
+ #[derive(Debug, Deserialize, Default)]
229
+ struct ArchiveDoc {
230
+ #[serde(default)]
231
+ identifier: String,
232
+ #[serde(default)]
233
+ title: Option<String>,
234
+ #[serde(default)]
235
+ description: Option<serde_json::Value>,
236
+ }
237
+
238
+ static ENDPOINT_HEALTH: OnceLock<Mutex<HashMap<String, EndpointHealth>>> = OnceLock::new();
239
+
240
+ fn endpoint_health_store() -> &'static Mutex<HashMap<String, EndpointHealth>> {
241
+ ENDPOINT_HEALTH.get_or_init(|| Mutex::new(HashMap::new()))
242
+ }
243
+
244
+ #[tokio::main]
245
+ async fn main() -> Result<()> {
246
+ let cli = Cli::parse();
247
+ match cli.command {
248
+ Commands::WebSearch {
249
+ query,
250
+ max_results,
251
+ endpoint,
252
+ json,
253
+ } => {
254
+ let results = broker_web_search(endpoint.as_deref(), &query, max_results).await?;
255
+ if json {
256
+ println!("{}", serde_json::to_string_pretty(&results)?);
257
+ } else if results.is_empty() {
258
+ println!("No results found.");
259
+ } else {
260
+ for (i, item) in results.iter().enumerate() {
261
+ println!(
262
+ "{}. {} [{} | {}]\n {}\n {}\n",
263
+ i + 1,
264
+ if item.title.trim().is_empty() {
265
+ "(untitled)"
266
+ } else {
267
+ item.title.trim()
268
+ },
269
+ if item.engine.trim().is_empty() {
270
+ "unknown"
271
+ } else {
272
+ item.engine.trim()
273
+ },
274
+ if item.provider.trim().is_empty() {
275
+ "broker"
276
+ } else {
277
+ item.provider.trim()
278
+ },
279
+ item.url.trim(),
280
+ item.content.trim()
281
+ );
282
+ }
283
+ }
284
+ }
285
+ Commands::WebFetch { url } => {
286
+ println!("{}", web_fetch_page(&url).await?);
287
+ }
288
+ Commands::TorrentSearch {
289
+ query,
290
+ max_results,
291
+ endpoint,
292
+ with_magnets,
293
+ json,
294
+ } => {
295
+ let results = torrent_search(
296
+ endpoint.as_deref(),
297
+ &query,
298
+ max_results.min(20).max(1),
299
+ with_magnets,
300
+ )
301
+ .await?;
302
+ if json {
303
+ println!("{}", serde_json::to_string_pretty(&results)?);
304
+ } else if results.is_empty() {
305
+ println!("No torrent results found.");
306
+ } else {
307
+ for (idx, item) in results.iter().enumerate() {
308
+ println!(
309
+ "{}. {} [{}]\n {}\n {}\n",
310
+ idx + 1,
311
+ if item.title.trim().is_empty() {
312
+ "(untitled)"
313
+ } else {
314
+ item.title.trim()
315
+ },
316
+ if item.engine.trim().is_empty() {
317
+ "unknown"
318
+ } else {
319
+ item.engine.trim()
320
+ },
321
+ item.url.trim(),
322
+ item.content.trim()
323
+ );
324
+ if !item.magnet_links.is_empty() {
325
+ for magnet in &item.magnet_links {
326
+ println!(" magnet: {}", magnet);
327
+ }
328
+ println!();
329
+ }
330
+ }
331
+ }
332
+ }
333
+ Commands::TorrentExtract {
334
+ url,
335
+ max_links,
336
+ json,
337
+ } => {
338
+ let links = extract_magnet_links(&url, max_links.min(20).max(1)).await?;
339
+ if json {
340
+ println!("{}", serde_json::to_string_pretty(&links)?);
341
+ } else if links.is_empty() {
342
+ println!("No magnet links found.");
343
+ } else {
344
+ for (idx, link) in links.iter().enumerate() {
345
+ println!("{}. {}", idx + 1, link);
346
+ }
347
+ }
348
+ }
349
+ Commands::Manifest => {
350
+ println!("{}", include_str!("../plugin.manifest.json"));
351
+ }
352
+ Commands::ServeMcp => {
353
+ run_mcp_server().await?;
354
+ }
355
+ }
356
+ Ok(())
357
+ }
358
+
359
+ async fn run_mcp_server() -> Result<()> {
360
+ let stdin = io::stdin();
361
+ let mut stdout = io::stdout();
362
+
363
+ for line in stdin.lock().lines() {
364
+ let line = line?;
365
+ let line = line.trim();
366
+ if line.is_empty() {
367
+ continue;
368
+ }
369
+
370
+ let request: JsonRpcRequest = match serde_json::from_str(line) {
371
+ Ok(req) => req,
372
+ Err(e) => {
373
+ let response = JsonRpcResponse {
374
+ jsonrpc: "2.0".to_string(),
375
+ id: None,
376
+ result: None,
377
+ error: Some(JsonRpcError {
378
+ code: -32700,
379
+ message: format!("Parse error: {}", e),
380
+ }),
381
+ };
382
+ writeln!(stdout, "{}", serde_json::to_string(&response)?)?;
383
+ stdout.flush()?;
384
+ continue;
385
+ }
386
+ };
387
+
388
+ let response = handle_mcp_request(&request).await;
389
+ writeln!(stdout, "{}", serde_json::to_string(&response)?)?;
390
+ stdout.flush()?;
391
+ }
392
+
393
+ Ok(())
394
+ }
395
+
396
+ async fn handle_mcp_request(request: &JsonRpcRequest) -> JsonRpcResponse {
397
+ match request.method.as_str() {
398
+ "initialize" => JsonRpcResponse {
399
+ jsonrpc: "2.0".to_string(),
400
+ id: request.id.clone(),
401
+ result: Some(serde_json::json!({
402
+ "protocolVersion": "2024-11-05",
403
+ "capabilities": {
404
+ "tools": {}
405
+ },
406
+ "serverInfo": {
407
+ "name": "masix-discovery",
408
+ "version": env!("CARGO_PKG_VERSION")
409
+ }
410
+ })),
411
+ error: None,
412
+ },
413
+ "notifications/initialized" => JsonRpcResponse {
414
+ jsonrpc: "2.0".to_string(),
415
+ id: None,
416
+ result: None,
417
+ error: None,
418
+ },
419
+ "tools/list" => {
420
+ let tools = get_tool_definitions();
421
+ JsonRpcResponse {
422
+ jsonrpc: "2.0".to_string(),
423
+ id: request.id.clone(),
424
+ result: Some(serde_json::json!({ "tools": tools })),
425
+ error: None,
426
+ }
427
+ }
428
+ "tools/call" => {
429
+ let params = &request.params;
430
+ let tool_name = params.get("name").and_then(|v| v.as_str()).unwrap_or("");
431
+
432
+ let arguments = params
433
+ .get("arguments")
434
+ .cloned()
435
+ .unwrap_or(serde_json::json!({}));
436
+
437
+ match handle_tool_call(tool_name, arguments).await {
438
+ Ok(result) => JsonRpcResponse {
439
+ jsonrpc: "2.0".to_string(),
440
+ id: request.id.clone(),
441
+ result: Some(serde_json::to_value(result).unwrap_or(serde_json::json!({}))),
442
+ error: None,
443
+ },
444
+ Err(e) => JsonRpcResponse {
445
+ jsonrpc: "2.0".to_string(),
446
+ id: request.id.clone(),
447
+ result: Some(
448
+ serde_json::to_value(ToolResult {
449
+ content: vec![ToolContent {
450
+ content_type: "text".to_string(),
451
+ text: format!("Error: {}", e),
452
+ }],
453
+ is_error: true,
454
+ })
455
+ .unwrap_or(serde_json::json!({})),
456
+ ),
457
+ error: None,
458
+ },
459
+ }
460
+ }
461
+ _ => JsonRpcResponse {
462
+ jsonrpc: "2.0".to_string(),
463
+ id: request.id.clone(),
464
+ result: None,
465
+ error: Some(JsonRpcError {
466
+ code: -32601,
467
+ message: format!("Method not found: {}", request.method),
468
+ }),
469
+ },
470
+ }
471
+ }
472
+
473
+ fn get_tool_definitions() -> Vec<ToolDefinition> {
474
+ vec![
475
+ ToolDefinition {
476
+ name: "web_search".to_string(),
477
+ description:
478
+ "Search the web using autonomous multi-engine broker (SearXNG + direct sources)."
479
+ .to_string(),
480
+ input_schema: serde_json::json!({
481
+ "type": "object",
482
+ "properties": {
483
+ "query": {
484
+ "type": "string",
485
+ "description": "Search query"
486
+ },
487
+ "max_results": {
488
+ "type": "integer",
489
+ "description": "Maximum number of results (default: 5, max: 20)",
490
+ "default": 5
491
+ }
492
+ },
493
+ "required": ["query"]
494
+ }),
495
+ },
496
+ ToolDefinition {
497
+ name: "web_fetch".to_string(),
498
+ description: "Fetch and extract text content from a web page".to_string(),
499
+ input_schema: serde_json::json!({
500
+ "type": "object",
501
+ "properties": {
502
+ "url": {
503
+ "type": "string",
504
+ "description": "URL to fetch"
505
+ }
506
+ },
507
+ "required": ["url"]
508
+ }),
509
+ },
510
+ ToolDefinition {
511
+ name: "torrent_search".to_string(),
512
+ description:
513
+ "Search torrent metadata pages (lawful use only), with optional magnet extraction"
514
+ .to_string(),
515
+ input_schema: serde_json::json!({
516
+ "type": "object",
517
+ "properties": {
518
+ "query": {
519
+ "type": "string",
520
+ "description": "Search query for torrent metadata"
521
+ },
522
+ "max_results": {
523
+ "type": "integer",
524
+ "description": "Maximum number of results (default: 5, max: 20)",
525
+ "default": 5
526
+ },
527
+ "with_magnets": {
528
+ "type": "boolean",
529
+ "description": "Try to extract magnet links from each result URL (default: true)",
530
+ "default": true
531
+ }
532
+ },
533
+ "required": ["query"]
534
+ }),
535
+ },
536
+ ToolDefinition {
537
+ name: "torrent_extract".to_string(),
538
+ description: "Extract magnet links from a page URL".to_string(),
539
+ input_schema: serde_json::json!({
540
+ "type": "object",
541
+ "properties": {
542
+ "url": {
543
+ "type": "string",
544
+ "description": "Page URL or magnet URL"
545
+ },
546
+ "max_links": {
547
+ "type": "integer",
548
+ "description": "Maximum links to return (default: 5, max: 20)",
549
+ "default": 5
550
+ }
551
+ },
552
+ "required": ["url"]
553
+ }),
554
+ },
555
+ ]
556
+ }
557
+
558
+ async fn handle_tool_call(name: &str, arguments: serde_json::Value) -> Result<ToolResult> {
559
+ match name {
560
+ "web_search" => {
561
+ let query = arguments
562
+ .get("query")
563
+ .and_then(|v| v.as_str())
564
+ .ok_or_else(|| anyhow!("Missing 'query' parameter"))?;
565
+ let max_results = arguments
566
+ .get("max_results")
567
+ .and_then(|v| v.as_u64())
568
+ .unwrap_or(5) as usize;
569
+
570
+ let results = broker_web_search(None, query, max_results.min(20).max(1)).await?;
571
+ let text = serde_json::to_string_pretty(&results)?;
572
+
573
+ Ok(ToolResult {
574
+ content: vec![ToolContent {
575
+ content_type: "text".to_string(),
576
+ text,
577
+ }],
578
+ is_error: false,
579
+ })
580
+ }
581
+ "web_fetch" => {
582
+ let url = arguments
583
+ .get("url")
584
+ .and_then(|v| v.as_str())
585
+ .ok_or_else(|| anyhow!("Missing 'url' parameter"))?;
586
+
587
+ let content = web_fetch_page(url).await?;
588
+
589
+ Ok(ToolResult {
590
+ content: vec![ToolContent {
591
+ content_type: "text".to_string(),
592
+ text: content,
593
+ }],
594
+ is_error: false,
595
+ })
596
+ }
597
+ "torrent_search" => {
598
+ let query = arguments
599
+ .get("query")
600
+ .and_then(|v| v.as_str())
601
+ .ok_or_else(|| anyhow!("Missing 'query' parameter"))?;
602
+ let max_results = arguments
603
+ .get("max_results")
604
+ .and_then(|v| v.as_u64())
605
+ .unwrap_or(5) as usize;
606
+ let with_magnets = arguments
607
+ .get("with_magnets")
608
+ .and_then(|v| v.as_bool())
609
+ .unwrap_or(true);
610
+
611
+ let results =
612
+ torrent_search(None, query, max_results.min(20).max(1), with_magnets).await?;
613
+ let text = serde_json::to_string_pretty(&results)?;
614
+
615
+ Ok(ToolResult {
616
+ content: vec![ToolContent {
617
+ content_type: "text".to_string(),
618
+ text,
619
+ }],
620
+ is_error: false,
621
+ })
622
+ }
623
+ "torrent_extract" => {
624
+ let url = arguments
625
+ .get("url")
626
+ .and_then(|v| v.as_str())
627
+ .ok_or_else(|| anyhow!("Missing 'url' parameter"))?;
628
+ let max_links = arguments
629
+ .get("max_links")
630
+ .and_then(|v| v.as_u64())
631
+ .unwrap_or(5) as usize;
632
+
633
+ let links = extract_magnet_links(url, max_links.min(20).max(1)).await?;
634
+ let text = serde_json::to_string_pretty(&links)?;
635
+
636
+ Ok(ToolResult {
637
+ content: vec![ToolContent {
638
+ content_type: "text".to_string(),
639
+ text,
640
+ }],
641
+ is_error: false,
642
+ })
643
+ }
644
+ _ => Err(anyhow!("Unknown tool: {}", name)),
645
+ }
646
+ }
647
+
648
+ async fn broker_web_search(
649
+ endpoint_override: Option<&str>,
650
+ query: &str,
651
+ max_results: usize,
652
+ ) -> Result<Vec<SearchResult>> {
653
+ let max_results = max_results.min(20).max(1);
654
+ let endpoints = resolve_searx_endpoints(endpoint_override);
655
+ let mut reports = collect_provider_reports(&endpoints, query, max_results).await;
656
+ let mut merged = Vec::new();
657
+ for report in &reports {
658
+ merged.extend(report.items.clone());
659
+ }
660
+ let ranked = rank_and_dedup(merged, max_results, query);
661
+ if !ranked.is_empty() {
662
+ return Ok(ranked);
663
+ }
664
+
665
+ // Retry once with a simplified keyword-only query, useful when LLM sends
666
+ // long instruction-style prompts instead of search terms.
667
+ if let Some(relaxed_query) = relax_search_query(query) {
668
+ if relaxed_query != query {
669
+ let retry_reports =
670
+ collect_provider_reports(&endpoints, &relaxed_query, max_results).await;
671
+ let mut retry_merged = Vec::new();
672
+ for report in &retry_reports {
673
+ retry_merged.extend(report.items.clone());
674
+ }
675
+ let retry_ranked = rank_and_dedup(retry_merged, max_results, &relaxed_query);
676
+ if !retry_ranked.is_empty() {
677
+ return Ok(retry_ranked);
678
+ }
679
+ reports.extend(retry_reports);
680
+ }
681
+ }
682
+
683
+ // Second retry using topic-focused keywords for geopolitical/news prompts.
684
+ if let Some(topic_query) = topic_focus_query(query) {
685
+ if topic_query != query {
686
+ let topic_reports =
687
+ collect_provider_reports(&endpoints, &topic_query, max_results).await;
688
+ let mut topic_merged = Vec::new();
689
+ for report in &topic_reports {
690
+ topic_merged.extend(report.items.clone());
691
+ }
692
+ let topic_ranked = rank_and_dedup(topic_merged, max_results, &topic_query);
693
+ if !topic_ranked.is_empty() {
694
+ return Ok(topic_ranked);
695
+ }
696
+ reports.extend(topic_reports);
697
+ }
698
+ }
699
+
700
+ if let Ok(probe_results) = direct_domain_probe(query, max_results).await {
701
+ if !probe_results.is_empty() {
702
+ return Ok(probe_results);
703
+ }
704
+ }
705
+
706
+ let mut provider_errors = Vec::new();
707
+ for report in reports {
708
+ if let Some(err) = report.error {
709
+ provider_errors.push(format!("{}: {}", report.provider, err));
710
+ }
711
+ }
712
+
713
+ if provider_errors.is_empty() {
714
+ Err(anyhow!(
715
+ "multi-engine search returned no results for query '{}'; providers returned empty sets",
716
+ query
717
+ ))
718
+ } else {
719
+ Err(anyhow!(
720
+ "multi-engine search failed for query '{}': {}",
721
+ query,
722
+ provider_errors.join(" | ")
723
+ ))
724
+ }
725
+ }
726
+
727
+ async fn collect_provider_reports(
728
+ endpoints: &[String],
729
+ query: &str,
730
+ max_results: usize,
731
+ ) -> Vec<SearchProviderReport> {
732
+ let searx_future = searx_search_broker(
733
+ endpoints,
734
+ query,
735
+ (max_results * 3).min(MAX_PROVIDER_RESULTS),
736
+ MAX_SEARX_PARALLEL_ENDPOINTS,
737
+ );
738
+ let wiki_future = wikipedia_search(query, (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS));
739
+ let news_future = google_news_search(query, (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS));
740
+ let brave_future = brave_html_search(query, (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS));
741
+ let duckduckgo_future =
742
+ duckduckgo_html_search(query, (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS));
743
+ let bing_future = bing_rss_search(query, (max_results / 2).max(3).min(MAX_PROVIDER_RESULTS));
744
+
745
+ let (searx, wiki, news, brave, duckduckgo, bing) = tokio::join!(
746
+ searx_future,
747
+ wiki_future,
748
+ news_future,
749
+ brave_future,
750
+ duckduckgo_future,
751
+ bing_future
752
+ );
753
+
754
+ vec![
755
+ as_provider_report("searx", searx),
756
+ as_provider_report("wikipedia", wiki),
757
+ as_provider_report("news-rss", news),
758
+ as_provider_report("brave-html", brave),
759
+ as_provider_report("duckduckgo", duckduckgo),
760
+ as_provider_report("bing-rss", bing),
761
+ ]
762
+ }
763
+
764
+ fn relax_search_query(query: &str) -> Option<String> {
765
+ let stopwords: HashSet<&'static str> = [
766
+ "fai",
767
+ "ricerca",
768
+ "ricercare",
769
+ "cerca",
770
+ "cercare",
771
+ "correlate",
772
+ "correlato",
773
+ "analizza",
774
+ "analisi",
775
+ "eventuali",
776
+ "possibili",
777
+ "breve",
778
+ "riassunto",
779
+ "sintesi",
780
+ "su",
781
+ "sul",
782
+ "sulla",
783
+ "sulle",
784
+ "con",
785
+ "per",
786
+ "tra",
787
+ "fra",
788
+ "del",
789
+ "della",
790
+ "delle",
791
+ "degli",
792
+ "dei",
793
+ "e",
794
+ "ed",
795
+ "in",
796
+ "di",
797
+ "da",
798
+ "a",
799
+ "il",
800
+ "lo",
801
+ "la",
802
+ "gli",
803
+ "le",
804
+ "the",
805
+ "and",
806
+ "for",
807
+ "with",
808
+ "from",
809
+ "into",
810
+ ]
811
+ .into_iter()
812
+ .collect();
813
+
814
+ let mut keywords = Vec::new();
815
+ for raw in query.split_whitespace() {
816
+ let cleaned = raw
817
+ .trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
818
+ .to_lowercase();
819
+ if cleaned.len() < 3 {
820
+ continue;
821
+ }
822
+ if stopwords.contains(cleaned.as_str()) {
823
+ continue;
824
+ }
825
+ keywords.push(cleaned);
826
+ if keywords.len() >= 10 {
827
+ break;
828
+ }
829
+ }
830
+
831
+ if keywords.len() < 3 {
832
+ let fallback = query
833
+ .split_whitespace()
834
+ .map(|s| s.trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-'))
835
+ .filter(|s| s.len() >= 3)
836
+ .take(8)
837
+ .collect::<Vec<_>>();
838
+ if fallback.is_empty() {
839
+ return None;
840
+ }
841
+ return Some(fallback.join(" "));
842
+ }
843
+
844
+ Some(keywords.join(" "))
845
+ }
846
+
847
+ fn topic_focus_query(query: &str) -> Option<String> {
848
+ let priority = [
849
+ "israele",
850
+ "israel",
851
+ "gaza",
852
+ "hamas",
853
+ "guerra",
854
+ "war",
855
+ "iran",
856
+ "libano",
857
+ "hezbollah",
858
+ "blocco",
859
+ "blocchi",
860
+ "navale",
861
+ "navali",
862
+ "mar",
863
+ "mare",
864
+ "italia",
865
+ "italy",
866
+ "europa",
867
+ "europe",
868
+ ];
869
+
870
+ let tokens = query
871
+ .split_whitespace()
872
+ .map(|raw| {
873
+ raw.trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
874
+ .to_lowercase()
875
+ })
876
+ .filter(|t| t.len() >= 3)
877
+ .collect::<Vec<_>>();
878
+
879
+ if tokens.is_empty() {
880
+ return None;
881
+ }
882
+
883
+ let mut selected = Vec::new();
884
+ for p in priority {
885
+ if tokens.iter().any(|t| t == p) {
886
+ selected.push(p.to_string());
887
+ }
888
+ if selected.len() >= 7 {
889
+ break;
890
+ }
891
+ }
892
+
893
+ if selected.len() < 3 {
894
+ for t in tokens {
895
+ if !selected.iter().any(|x| x == &t) {
896
+ selected.push(t);
897
+ }
898
+ if selected.len() >= 7 {
899
+ break;
900
+ }
901
+ }
902
+ }
903
+
904
+ if selected.is_empty() {
905
+ None
906
+ } else {
907
+ Some(selected.join(" "))
908
+ }
909
+ }
910
+
911
+ fn as_provider_report(
912
+ provider: &'static str,
913
+ result: Result<Vec<SearchResult>>,
914
+ ) -> SearchProviderReport {
915
+ match result {
916
+ Ok(items) => SearchProviderReport {
917
+ provider,
918
+ items,
919
+ error: None,
920
+ },
921
+ Err(e) => SearchProviderReport {
922
+ provider,
923
+ items: Vec::new(),
924
+ error: Some(e.to_string()),
925
+ },
926
+ }
927
+ }
928
+
929
+ fn rank_and_dedup(
930
+ results: Vec<SearchResult>,
931
+ max_results: usize,
932
+ query: &str,
933
+ ) -> Vec<SearchResult> {
934
+ let query_tokens = query_tokens(query);
935
+ let mut deduped = Vec::with_capacity(results.len());
936
+ let mut seen_urls = HashSet::new();
937
+
938
+ for mut item in results {
939
+ if item.url.trim().is_empty() {
940
+ continue;
941
+ }
942
+ let key = normalize_url_key(&item.url);
943
+ if key.is_empty() || !seen_urls.insert(key) {
944
+ continue;
945
+ }
946
+ if item.source_domain.trim().is_empty() {
947
+ item.source_domain = source_domain(&item.url);
948
+ }
949
+ deduped.push(item);
950
+ }
951
+
952
+ let mut domain_counts: HashMap<String, u32> = HashMap::new();
953
+ for item in &mut deduped {
954
+ let domain = if item.source_domain.trim().is_empty() {
955
+ "unknown".to_string()
956
+ } else {
957
+ item.source_domain.clone()
958
+ };
959
+
960
+ let count = *domain_counts.get(&domain).unwrap_or(&0);
961
+ let mut score = 1.0;
962
+
963
+ score += match item.provider.as_str() {
964
+ p if p.starts_with("searx") => 0.9,
965
+ p if p.starts_with("brave-html") => 0.82,
966
+ "wikipedia" => 0.75,
967
+ p if p.starts_with("news-rss") => 0.7,
968
+ p if p.starts_with("duckduckgo") => 0.65,
969
+ p if p.starts_with("bing-rss") => 0.55,
970
+ p if p.starts_with("direct-probe") => 0.62,
971
+ _ => 0.5,
972
+ };
973
+
974
+ if !item.engine.trim().is_empty() {
975
+ score += 0.15;
976
+ }
977
+ if item.content.len() > 80 {
978
+ score += 0.15;
979
+ }
980
+ let combined = format!("{} {} {}", item.title, item.content, item.url);
981
+ let relevance = query_overlap_score(&combined, &query_tokens);
982
+ score += relevance;
983
+ if relevance <= 0.01 {
984
+ score -= 0.3;
985
+ }
986
+ score -= 0.18 * count as f64;
987
+ if domain == "unknown" {
988
+ score -= 0.1;
989
+ }
990
+
991
+ item.score = Some((score * 100.0).round() / 100.0);
992
+ domain_counts.insert(domain, count + 1);
993
+ }
994
+
995
+ deduped.sort_by(|a, b| {
996
+ let sb = b.score.unwrap_or(0.0);
997
+ let sa = a.score.unwrap_or(0.0);
998
+ sb.partial_cmp(&sa)
999
+ .unwrap_or(Ordering::Equal)
1000
+ .then_with(|| a.source_domain.cmp(&b.source_domain))
1001
+ .then_with(|| a.title.cmp(&b.title))
1002
+ });
1003
+
1004
+ if !query_tokens.is_empty() {
1005
+ deduped.retain(|item| {
1006
+ let combined = format!("{} {} {}", item.title, item.content, item.url);
1007
+ query_overlap_score(&combined, &query_tokens) > 0.01
1008
+ });
1009
+ }
1010
+
1011
+ deduped.truncate(max_results);
1012
+ deduped
1013
+ }
1014
+
1015
+ fn query_tokens(query: &str) -> Vec<String> {
1016
+ let mut out = Vec::new();
1017
+ for raw in query.split_whitespace() {
1018
+ let token = raw
1019
+ .trim_matches(|c: char| !c.is_alphanumeric() && c != '_' && c != '-')
1020
+ .to_lowercase();
1021
+ if token.len() < 3 {
1022
+ continue;
1023
+ }
1024
+ if !out.iter().any(|v| v == &token) {
1025
+ out.push(token);
1026
+ }
1027
+ if out.len() >= 12 {
1028
+ break;
1029
+ }
1030
+ }
1031
+ out
1032
+ }
1033
+
1034
+ fn query_overlap_score(text: &str, query_tokens: &[String]) -> f64 {
1035
+ if query_tokens.is_empty() {
1036
+ return 0.0;
1037
+ }
1038
+ let lower = text.to_lowercase();
1039
+ let mut matches = 0usize;
1040
+ for token in query_tokens {
1041
+ if lower.contains(token) {
1042
+ matches += 1;
1043
+ }
1044
+ }
1045
+ (matches as f64 / query_tokens.len() as f64) * 0.9
1046
+ }
1047
+
1048
+ async fn searx_search_broker(
1049
+ endpoints: &[String],
1050
+ query: &str,
1051
+ max_results_per_endpoint: usize,
1052
+ max_parallel_endpoints: usize,
1053
+ ) -> Result<Vec<SearchResult>> {
1054
+ let ordered = rank_endpoints(endpoints).await;
1055
+ let cap = max_parallel_endpoints.max(1).min(ordered.len().max(1));
1056
+ let selected: Vec<String> = ordered.iter().take(cap).cloned().collect();
1057
+
1058
+ let mut tasks = Vec::with_capacity(selected.len());
1059
+ for endpoint in selected {
1060
+ let q = query.to_string();
1061
+ tasks.push(tokio::spawn(async move {
1062
+ let out = web_search_single_endpoint(&endpoint, &q, max_results_per_endpoint).await;
1063
+ (endpoint, out)
1064
+ }));
1065
+ }
1066
+
1067
+ let mut merged = Vec::new();
1068
+ let mut errors = Vec::new();
1069
+
1070
+ for task in tasks {
1071
+ match task.await {
1072
+ Ok((endpoint, Ok(results))) => {
1073
+ mark_endpoint_success(&endpoint).await;
1074
+ merged.extend(results);
1075
+ }
1076
+ Ok((endpoint, Err(err))) => {
1077
+ mark_endpoint_failure(&endpoint, &err.to_string()).await;
1078
+ errors.push(format!("{} -> {}", endpoint, err));
1079
+ }
1080
+ Err(err) => {
1081
+ errors.push(format!("join error: {}", err));
1082
+ }
1083
+ }
1084
+ }
1085
+
1086
+ if merged.is_empty() {
1087
+ // Fallback pass: if caller provided more endpoints than current parallel cap,
1088
+ // probe the remaining ones sequentially before failing.
1089
+ for endpoint in ordered.iter().skip(cap).cloned() {
1090
+ match web_search_single_endpoint(&endpoint, query, max_results_per_endpoint).await {
1091
+ Ok(results) => {
1092
+ mark_endpoint_success(&endpoint).await;
1093
+ merged.extend(results);
1094
+ if !merged.is_empty() {
1095
+ break;
1096
+ }
1097
+ }
1098
+ Err(err) => {
1099
+ mark_endpoint_failure(&endpoint, &err.to_string()).await;
1100
+ errors.push(format!("{} -> {}", endpoint, err));
1101
+ }
1102
+ }
1103
+ }
1104
+ }
1105
+
1106
+ if merged.is_empty() {
1107
+ let details = if errors.is_empty() {
1108
+ "unknown failure".to_string()
1109
+ } else {
1110
+ errors.join(" | ")
1111
+ };
1112
+ return Err(anyhow!("all searx endpoints failed: {}", details));
1113
+ }
1114
+
1115
+ Ok(merged)
1116
+ }
1117
+
1118
+ async fn rank_endpoints(endpoints: &[String]) -> Vec<String> {
1119
+ let mut unique = Vec::new();
1120
+ let mut seen = HashSet::new();
1121
+ for raw in endpoints {
1122
+ let normalized = raw.trim().trim_end_matches('/').to_string();
1123
+ if normalized.is_empty() {
1124
+ continue;
1125
+ }
1126
+ if seen.insert(normalized.clone()) {
1127
+ unique.push(normalized);
1128
+ }
1129
+ }
1130
+
1131
+ let now = now_unix_secs();
1132
+ let map = endpoint_health_store().lock().await;
1133
+
1134
+ let mut ranked: Vec<(String, i64)> = unique
1135
+ .into_iter()
1136
+ .map(|endpoint| {
1137
+ let stat = map.get(&endpoint).cloned().unwrap_or_default();
1138
+ let cooldown_penalty = if stat.cooldown_until > now { 20 } else { 0 };
1139
+ let score = (stat.successes as i64 * 2) - (stat.failures as i64) - cooldown_penalty;
1140
+ (endpoint, score)
1141
+ })
1142
+ .collect();
1143
+
1144
+ ranked.sort_by(|a, b| b.1.cmp(&a.1));
1145
+ ranked.into_iter().map(|x| x.0).collect()
1146
+ }
1147
+
1148
+ async fn mark_endpoint_success(endpoint: &str) {
1149
+ let mut map = endpoint_health_store().lock().await;
1150
+ let entry = map.entry(endpoint.to_string()).or_default();
1151
+ entry.successes = entry.successes.saturating_add(1);
1152
+ entry.cooldown_until = 0;
1153
+ entry.last_error = None;
1154
+ }
1155
+
1156
+ async fn mark_endpoint_failure(endpoint: &str, error: &str) {
1157
+ let mut map = endpoint_health_store().lock().await;
1158
+ let entry = map.entry(endpoint.to_string()).or_default();
1159
+ entry.failures = entry.failures.saturating_add(1);
1160
+ let lower = error.to_lowercase();
1161
+ let cooldown = if lower.contains("429") { 120 } else { 30 };
1162
+ entry.cooldown_until = now_unix_secs().saturating_add(cooldown);
1163
+ entry.last_error = Some(error.to_string());
1164
+ }
1165
+
1166
+ async fn web_search_single_endpoint(
1167
+ endpoint: &str,
1168
+ query: &str,
1169
+ max_results: usize,
1170
+ ) -> Result<Vec<SearchResult>> {
1171
+ let base = endpoint.trim_end_matches('/');
1172
+ let url = format!(
1173
+ "{}/search?q={}&format=json&language=en-US",
1174
+ base,
1175
+ url_encode(query)
1176
+ );
1177
+ let client = reqwest::Client::builder()
1178
+ .user_agent(format!("MasiXDiscovery/{} searx", MODULE_VERSION))
1179
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1180
+ .build()?;
1181
+
1182
+ let mut last_error: Option<String> = None;
1183
+
1184
+ for attempt in 1..=SEARX_RETRIES {
1185
+ match client
1186
+ .get(&url)
1187
+ .header("Accept", "application/json")
1188
+ .header("Accept-Language", "en-US,en;q=0.8")
1189
+ .send()
1190
+ .await
1191
+ {
1192
+ Ok(response) => {
1193
+ let status = response.status();
1194
+ if status.is_success() {
1195
+ let mut parsed: SearxResponse = response
1196
+ .json()
1197
+ .await
1198
+ .map_err(|e| anyhow!("Invalid SearXNG JSON from {}: {}", endpoint, e))?;
1199
+ parsed.results.retain(|r| !r.url.trim().is_empty());
1200
+ parsed
1201
+ .results
1202
+ .truncate(max_results.min(MAX_PROVIDER_RESULTS));
1203
+
1204
+ let provider = format!("searx:{}", endpoint_host(endpoint));
1205
+ let mapped = parsed
1206
+ .results
1207
+ .into_iter()
1208
+ .map(|item| SearchResult {
1209
+ title: item.title,
1210
+ url: item.url.clone(),
1211
+ content: item.content,
1212
+ engine: item.engine,
1213
+ provider: provider.clone(),
1214
+ source_domain: source_domain(&item.url),
1215
+ score: None,
1216
+ endpoint: Some(endpoint.to_string()),
1217
+ })
1218
+ .collect::<Vec<_>>();
1219
+ return Ok(mapped);
1220
+ }
1221
+
1222
+ let body = response.text().await.unwrap_or_default();
1223
+ let snippet = summarize_for_error(&body);
1224
+ let err = format!(
1225
+ "SearX endpoint {} HTTP {} (attempt {}/{}): {}",
1226
+ endpoint, status, attempt, SEARX_RETRIES, snippet
1227
+ );
1228
+
1229
+ if should_retry_status(status) && attempt < SEARX_RETRIES {
1230
+ last_error = Some(err);
1231
+ tokio::time::sleep(backoff_for_attempt(attempt, status.as_u16())).await;
1232
+ continue;
1233
+ }
1234
+
1235
+ return Err(anyhow!(err));
1236
+ }
1237
+ Err(e) => {
1238
+ let err = format!(
1239
+ "SearX endpoint {} request error (attempt {}/{}): {}",
1240
+ endpoint, attempt, SEARX_RETRIES, e
1241
+ );
1242
+ if attempt < SEARX_RETRIES {
1243
+ last_error = Some(err);
1244
+ tokio::time::sleep(backoff_for_attempt(attempt, 0)).await;
1245
+ continue;
1246
+ }
1247
+ return Err(anyhow!(err));
1248
+ }
1249
+ }
1250
+ }
1251
+
1252
+ Err(anyhow!(
1253
+ "SearX endpoint {} failed: {}",
1254
+ endpoint,
1255
+ last_error.unwrap_or_else(|| "unknown error".to_string())
1256
+ ))
1257
+ }
1258
+
1259
+ fn should_retry_status(status: reqwest::StatusCode) -> bool {
1260
+ matches!(
1261
+ status.as_u16(),
1262
+ 429 | 500 | 502 | 503 | 504 | 520 | 521 | 522 | 523 | 524
1263
+ )
1264
+ }
1265
+
1266
+ fn backoff_for_attempt(attempt: usize, status: u16) -> Duration {
1267
+ let base_ms = match status {
1268
+ 429 => 1200,
1269
+ 500 | 502 | 503 | 504 => 700,
1270
+ _ => 500,
1271
+ };
1272
+ let factor = (attempt.saturating_sub(1)) as u32;
1273
+ Duration::from_millis(base_ms * (2_u64.pow(factor)).min(6))
1274
+ }
1275
+
1276
+ fn endpoint_host(endpoint: &str) -> String {
1277
+ reqwest::Url::parse(endpoint)
1278
+ .ok()
1279
+ .and_then(|u| u.host_str().map(|s| s.to_string()))
1280
+ .unwrap_or_else(|| "unknown".to_string())
1281
+ }
1282
+
1283
+ async fn wikipedia_search(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1284
+ let client = reqwest::Client::builder()
1285
+ .user_agent(format!("MasiXDiscovery/{} wikipedia", MODULE_VERSION))
1286
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1287
+ .build()?;
1288
+
1289
+ let url = format!(
1290
+ "https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={}&utf8=&format=json&srlimit={}",
1291
+ url_encode(query),
1292
+ max_results.min(20).max(1)
1293
+ );
1294
+
1295
+ let response = client
1296
+ .get(&url)
1297
+ .header("Accept", "application/json")
1298
+ .send()
1299
+ .await?;
1300
+
1301
+ if !response.status().is_success() {
1302
+ return Err(anyhow!(
1303
+ "wikipedia provider failed: HTTP {}",
1304
+ response.status()
1305
+ ));
1306
+ }
1307
+
1308
+ let parsed: WikipediaResponse = response.json().await?;
1309
+ let entries = parsed
1310
+ .query
1311
+ .map(|q| q.search)
1312
+ .unwrap_or_default()
1313
+ .into_iter()
1314
+ .filter(|item| item.pageid > 0)
1315
+ .take(max_results.min(MAX_PROVIDER_RESULTS))
1316
+ .map(|item| {
1317
+ let page_url = format!("https://en.wikipedia.org/?curid={}", item.pageid);
1318
+ SearchResult {
1319
+ title: item.title,
1320
+ url: page_url.clone(),
1321
+ content: strip_html_tags(&decode_xml_entities(&item.snippet)),
1322
+ engine: "wikipedia-api".to_string(),
1323
+ provider: "wikipedia".to_string(),
1324
+ source_domain: source_domain(&page_url),
1325
+ score: None,
1326
+ endpoint: None,
1327
+ }
1328
+ })
1329
+ .collect::<Vec<_>>();
1330
+
1331
+ Ok(entries)
1332
+ }
1333
+
1334
+ async fn google_news_search(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1335
+ let per_region = ((max_results.saturating_mul(2)) / NEWS_REGIONS.len())
1336
+ .max(2)
1337
+ .min(MAX_PROVIDER_RESULTS);
1338
+
1339
+ let mut tasks = Vec::with_capacity(NEWS_REGIONS.len());
1340
+ for (country, hl, ceid) in NEWS_REGIONS {
1341
+ let q = query.to_string();
1342
+ let country = *country;
1343
+ let hl = *hl;
1344
+ let ceid = *ceid;
1345
+ tasks.push(tokio::spawn(async move {
1346
+ let out = google_news_region_search(&q, country, hl, ceid, per_region).await;
1347
+ (country, out)
1348
+ }));
1349
+ }
1350
+
1351
+ let mut merged = Vec::new();
1352
+ let mut errors = Vec::new();
1353
+ for task in tasks {
1354
+ match task.await {
1355
+ Ok((_, Ok(items))) => merged.extend(items),
1356
+ Ok((country, Err(err))) => errors.push(format!("{}: {}", country, err)),
1357
+ Err(err) => errors.push(format!("join error: {}", err)),
1358
+ }
1359
+ }
1360
+
1361
+ if merged.is_empty() {
1362
+ if errors.is_empty() {
1363
+ Err(anyhow!("news-rss provider returned no results"))
1364
+ } else {
1365
+ Err(anyhow!(
1366
+ "news-rss providers failed for query '{}': {}",
1367
+ query,
1368
+ errors.join(" | ")
1369
+ ))
1370
+ }
1371
+ } else {
1372
+ Ok(merged)
1373
+ }
1374
+ }
1375
+
1376
+ async fn google_news_region_search(
1377
+ query: &str,
1378
+ country: &str,
1379
+ hl: &str,
1380
+ ceid: &str,
1381
+ max_results: usize,
1382
+ ) -> Result<Vec<SearchResult>> {
1383
+ let client = reqwest::Client::builder()
1384
+ .user_agent(format!("MasiXDiscovery/{} news-rss", MODULE_VERSION))
1385
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1386
+ .build()?;
1387
+
1388
+ let url = format!(
1389
+ "https://news.google.com/rss/search?q={}&hl={}&gl={}&ceid={}",
1390
+ url_encode(query),
1391
+ url_encode(hl),
1392
+ url_encode(country),
1393
+ url_encode(ceid)
1394
+ );
1395
+
1396
+ let response = client.get(&url).send().await?;
1397
+ if !response.status().is_success() {
1398
+ return Err(anyhow!(
1399
+ "news-rss provider {} failed: HTTP {}",
1400
+ country,
1401
+ response.status()
1402
+ ));
1403
+ }
1404
+
1405
+ let body = response.text().await?;
1406
+ let limit = max_results.min(MAX_PROVIDER_RESULTS).max(1);
1407
+ let mut items = parse_rss_items(&body, limit);
1408
+
1409
+ if items.is_empty() {
1410
+ items = parse_atom_entries(&body, limit);
1411
+ }
1412
+
1413
+ let engine = format!("google-news-rss-{}", country.to_lowercase());
1414
+ let provider = format!("news-rss:{}", country);
1415
+
1416
+ Ok(items
1417
+ .into_iter()
1418
+ .map(|item| SearchResult {
1419
+ title: item.title,
1420
+ url: item.link.clone(),
1421
+ content: item.description,
1422
+ engine: engine.clone(),
1423
+ provider: provider.clone(),
1424
+ source_domain: source_domain(&item.link),
1425
+ score: None,
1426
+ endpoint: None,
1427
+ })
1428
+ .collect::<Vec<_>>())
1429
+ }
1430
+
1431
+ async fn brave_html_search(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1432
+ let client = reqwest::Client::builder()
1433
+ .user_agent(BROWSER_USER_AGENT)
1434
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1435
+ .build()?;
1436
+
1437
+ let url = format!(
1438
+ "https://search.brave.com/search?q={}&source=web",
1439
+ url_encode(query)
1440
+ );
1441
+ let response = client
1442
+ .get(&url)
1443
+ .header("Accept-Language", "en-US,en;q=0.8")
1444
+ .header("Accept-Encoding", "gzip, deflate")
1445
+ .send()
1446
+ .await?;
1447
+ if !response.status().is_success() {
1448
+ return Err(anyhow!(
1449
+ "brave-html provider failed: HTTP {}",
1450
+ response.status()
1451
+ ));
1452
+ }
1453
+
1454
+ let body = response.text().await?;
1455
+ let doc = Html::parse_document(&body);
1456
+ let snippet_selector = Selector::parse("div.snippet[data-type=\"web\"]")
1457
+ .map_err(|e| anyhow!("brave snippet selector error: {}", e))?;
1458
+ let link_selector =
1459
+ Selector::parse("a.l1[href]").map_err(|e| anyhow!("brave link selector error: {}", e))?;
1460
+ let title_selector =
1461
+ Selector::parse("div.title").map_err(|e| anyhow!("brave title selector error: {}", e))?;
1462
+ let desc_selector = Selector::parse("div.snippet-description")
1463
+ .map_err(|e| anyhow!("brave description selector error: {}", e))?;
1464
+
1465
+ let mut results = Vec::new();
1466
+ let limit = max_results.min(MAX_PROVIDER_RESULTS).max(1);
1467
+ for snippet in doc.select(&snippet_selector) {
1468
+ if results.len() >= limit {
1469
+ break;
1470
+ }
1471
+ let Some(link) = snippet.select(&link_selector).next() else {
1472
+ continue;
1473
+ };
1474
+ let href = link.value().attr("href").unwrap_or("").trim();
1475
+ if !(href.starts_with("http://") || href.starts_with("https://")) {
1476
+ continue;
1477
+ }
1478
+
1479
+ let title = snippet
1480
+ .select(&title_selector)
1481
+ .next()
1482
+ .map(|n| n.text().collect::<Vec<_>>().join(" ").trim().to_string())
1483
+ .filter(|v| !v.is_empty())
1484
+ .or_else(|| {
1485
+ let fallback = link.text().collect::<Vec<_>>().join(" ").trim().to_string();
1486
+ if fallback.is_empty() {
1487
+ None
1488
+ } else {
1489
+ Some(fallback)
1490
+ }
1491
+ })
1492
+ .unwrap_or_else(|| "Result from Brave Search".to_string());
1493
+
1494
+ let content = snippet
1495
+ .select(&desc_selector)
1496
+ .next()
1497
+ .map(|n| n.text().collect::<Vec<_>>().join(" ").trim().to_string())
1498
+ .unwrap_or_default();
1499
+
1500
+ results.push(SearchResult {
1501
+ title,
1502
+ url: href.to_string(),
1503
+ content,
1504
+ engine: "brave-html".to_string(),
1505
+ provider: "brave-html".to_string(),
1506
+ source_domain: source_domain(href),
1507
+ score: None,
1508
+ endpoint: Some("https://search.brave.com/search".to_string()),
1509
+ });
1510
+ }
1511
+
1512
+ if results.is_empty() {
1513
+ Err(anyhow!("brave-html provider returned no results"))
1514
+ } else {
1515
+ Ok(results)
1516
+ }
1517
+ }
1518
+
1519
+ async fn duckduckgo_html_search(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1520
+ let client = reqwest::Client::builder()
1521
+ .user_agent(format!("MasiXDiscovery/{} duckduckgo", MODULE_VERSION))
1522
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1523
+ .build()?;
1524
+ let target = max_results.min(MAX_PROVIDER_RESULTS).max(1);
1525
+
1526
+ // Preferred path: DuckDuckGo lite endpoint is more stable than /html in headless contexts.
1527
+ let lite_url = format!(
1528
+ "https://lite.duckduckgo.com/lite/?q={}&kl=wt-wt",
1529
+ url_encode(query)
1530
+ );
1531
+ if let Ok(response) = client
1532
+ .get(&lite_url)
1533
+ .header("User-Agent", BROWSER_USER_AGENT)
1534
+ .header("Accept-Language", "en-US,en;q=0.8")
1535
+ .send()
1536
+ .await
1537
+ {
1538
+ if response.status().is_success() {
1539
+ let body = response.text().await.unwrap_or_default();
1540
+ let doc = Html::parse_document(&body);
1541
+ let title_selector = Selector::parse("a.result-link")
1542
+ .map_err(|e| anyhow!("duckduckgo lite selector error: {}", e))?;
1543
+ let snippet_selector = Selector::parse("td.result-snippet")
1544
+ .map_err(|e| anyhow!("duckduckgo lite snippet selector error: {}", e))?;
1545
+ let snippets = doc
1546
+ .select(&snippet_selector)
1547
+ .map(|n| n.text().collect::<Vec<_>>().join(" ").trim().to_string())
1548
+ .collect::<Vec<_>>();
1549
+
1550
+ let mut results = Vec::new();
1551
+ for (idx, element) in doc.select(&title_selector).enumerate() {
1552
+ if results.len() >= target {
1553
+ break;
1554
+ }
1555
+ let raw_href = element.value().attr("href").unwrap_or("").trim();
1556
+ let url = normalize_duckduckgo_href(raw_href);
1557
+ if url.is_empty() {
1558
+ continue;
1559
+ }
1560
+ let title = element
1561
+ .text()
1562
+ .collect::<Vec<_>>()
1563
+ .join(" ")
1564
+ .trim()
1565
+ .to_string();
1566
+ if title.is_empty() {
1567
+ continue;
1568
+ }
1569
+ let content = snippets.get(idx).cloned().unwrap_or_default();
1570
+ results.push(SearchResult {
1571
+ title,
1572
+ url: url.clone(),
1573
+ content,
1574
+ engine: "duckduckgo-lite".to_string(),
1575
+ provider: "duckduckgo".to_string(),
1576
+ source_domain: source_domain(&url),
1577
+ score: None,
1578
+ endpoint: Some("https://lite.duckduckgo.com/lite".to_string()),
1579
+ });
1580
+ }
1581
+ if !results.is_empty() {
1582
+ return Ok(results);
1583
+ }
1584
+ }
1585
+ }
1586
+
1587
+ let url = format!(
1588
+ "https://duckduckgo.com/html/?q={}&kl=wt-wt",
1589
+ url_encode(query)
1590
+ );
1591
+ let response = client
1592
+ .get(&url)
1593
+ .header("User-Agent", BROWSER_USER_AGENT)
1594
+ .header("Accept-Language", "en-US,en;q=0.8")
1595
+ .send()
1596
+ .await?;
1597
+ if !response.status().is_success() {
1598
+ return Err(anyhow!(
1599
+ "duckduckgo provider failed: HTTP {}",
1600
+ response.status()
1601
+ ));
1602
+ }
1603
+
1604
+ let body = response.text().await?;
1605
+ let doc = Html::parse_document(&body);
1606
+ let title_selector =
1607
+ Selector::parse("a.result__a").map_err(|e| anyhow!("duckduckgo selector error: {}", e))?;
1608
+ let snippet_selector = Selector::parse(".result__snippet")
1609
+ .map_err(|e| anyhow!("duckduckgo snippet selector error: {}", e))?;
1610
+ let fallback_link_selector =
1611
+ Selector::parse("a[href]").map_err(|e| anyhow!("duckduckgo fallback selector: {}", e))?;
1612
+
1613
+ let mut results = Vec::new();
1614
+
1615
+ for element in doc.select(&title_selector) {
1616
+ if results.len() >= target {
1617
+ break;
1618
+ }
1619
+ let raw_href = element.value().attr("href").unwrap_or("").trim();
1620
+ let url = normalize_duckduckgo_href(raw_href);
1621
+ if url.is_empty() {
1622
+ continue;
1623
+ }
1624
+ let title = element
1625
+ .text()
1626
+ .collect::<Vec<_>>()
1627
+ .join(" ")
1628
+ .trim()
1629
+ .to_string();
1630
+ if title.is_empty() {
1631
+ continue;
1632
+ }
1633
+
1634
+ let content = element
1635
+ .parent()
1636
+ .and_then(scraper::ElementRef::wrap)
1637
+ .and_then(|parent| parent.select(&snippet_selector).next())
1638
+ .map(|n| n.text().collect::<Vec<_>>().join(" ").trim().to_string())
1639
+ .unwrap_or_default();
1640
+
1641
+ results.push(SearchResult {
1642
+ title,
1643
+ url: url.clone(),
1644
+ content,
1645
+ engine: "duckduckgo-html".to_string(),
1646
+ provider: "duckduckgo".to_string(),
1647
+ source_domain: source_domain(&url),
1648
+ score: None,
1649
+ endpoint: Some("https://duckduckgo.com/html".to_string()),
1650
+ });
1651
+ }
1652
+
1653
+ if results.is_empty() {
1654
+ for link in doc.select(&fallback_link_selector) {
1655
+ if results.len() >= target {
1656
+ break;
1657
+ }
1658
+ let raw_href = link.value().attr("href").unwrap_or("").trim();
1659
+ let url = normalize_duckduckgo_href(raw_href);
1660
+ if url.is_empty() {
1661
+ continue;
1662
+ }
1663
+ let title = link.text().collect::<Vec<_>>().join(" ").trim().to_string();
1664
+ if title.len() < 12 {
1665
+ continue;
1666
+ }
1667
+ results.push(SearchResult {
1668
+ title,
1669
+ url: url.clone(),
1670
+ content: String::new(),
1671
+ engine: "duckduckgo-html-fallback".to_string(),
1672
+ provider: "duckduckgo".to_string(),
1673
+ source_domain: source_domain(&url),
1674
+ score: None,
1675
+ endpoint: Some("https://duckduckgo.com/html".to_string()),
1676
+ });
1677
+ }
1678
+ }
1679
+
1680
+ if results.is_empty() {
1681
+ Err(anyhow!("duckduckgo provider returned no results"))
1682
+ } else {
1683
+ Ok(results)
1684
+ }
1685
+ }
1686
+
1687
+ fn normalize_duckduckgo_href(raw_href: &str) -> String {
1688
+ let href = raw_href.trim();
1689
+ if href.is_empty() {
1690
+ return String::new();
1691
+ }
1692
+ if href.starts_with("http://") || href.starts_with("https://") {
1693
+ return href.to_string();
1694
+ }
1695
+ if href.starts_with("/l/?") || href.starts_with("//duckduckgo.com/l/?") {
1696
+ let canonical = if let Some(stripped) = href.strip_prefix("//duckduckgo.com") {
1697
+ format!("https://duckduckgo.com{}", stripped)
1698
+ } else {
1699
+ format!("https://duckduckgo.com{}", href)
1700
+ };
1701
+ if let Ok(url) = reqwest::Url::parse(&canonical) {
1702
+ for (k, v) in url.query_pairs() {
1703
+ if k == "uddg" {
1704
+ let out = v.to_string();
1705
+ if out.starts_with("http://") || out.starts_with("https://") {
1706
+ return out;
1707
+ }
1708
+ }
1709
+ }
1710
+ }
1711
+ }
1712
+ String::new()
1713
+ }
1714
+
1715
+ async fn bing_rss_search(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1716
+ let client = reqwest::Client::builder()
1717
+ .user_agent(format!("MasiXDiscovery/{} bing-rss", MODULE_VERSION))
1718
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
1719
+ .build()?;
1720
+ let url = format!(
1721
+ "https://www.bing.com/search?q={}&format=rss&setlang=en-US&cc=us",
1722
+ url_encode(query)
1723
+ );
1724
+
1725
+ let response = client.get(&url).send().await?;
1726
+ if !response.status().is_success() {
1727
+ return Err(anyhow!(
1728
+ "bing-rss provider failed: HTTP {}",
1729
+ response.status()
1730
+ ));
1731
+ }
1732
+
1733
+ let body = response.text().await?;
1734
+ let limit = max_results.min(MAX_PROVIDER_RESULTS).max(1);
1735
+ let mut items = parse_rss_items(&body, limit);
1736
+ if items.is_empty() {
1737
+ items = parse_atom_entries(&body, limit);
1738
+ }
1739
+ if items.is_empty() {
1740
+ return Err(anyhow!("bing-rss provider returned no results"));
1741
+ }
1742
+
1743
+ Ok(items
1744
+ .into_iter()
1745
+ .map(|item| SearchResult {
1746
+ title: item.title,
1747
+ url: item.link.clone(),
1748
+ content: item.description,
1749
+ engine: "bing-rss".to_string(),
1750
+ provider: "bing-rss".to_string(),
1751
+ source_domain: source_domain(&item.link),
1752
+ score: None,
1753
+ endpoint: Some("https://www.bing.com/search?format=rss".to_string()),
1754
+ })
1755
+ .collect::<Vec<_>>())
1756
+ }
1757
+
1758
+ async fn direct_domain_probe(query: &str, max_results: usize) -> Result<Vec<SearchResult>> {
1759
+ let mut tokens = query
1760
+ .split_whitespace()
1761
+ .map(|raw| {
1762
+ raw.trim_matches(|c: char| !c.is_alphanumeric() && c != '-' && c != '_')
1763
+ .to_lowercase()
1764
+ })
1765
+ .filter(|t| t.len() >= 3 && t.chars().all(|c| c.is_ascii_alphanumeric() || c == '-'))
1766
+ .collect::<Vec<_>>();
1767
+ tokens.dedup();
1768
+ if tokens.is_empty() {
1769
+ return Ok(Vec::new());
1770
+ }
1771
+
1772
+ let mut stems = Vec::new();
1773
+ stems.push(tokens[0].clone());
1774
+ if tokens.len() >= 2 {
1775
+ stems.push(format!("{}{}", tokens[0], tokens[1]));
1776
+ }
1777
+ stems.truncate(2);
1778
+
1779
+ let tlds = ["com", "org", "net", "io", "it"];
1780
+ let mut candidates = Vec::new();
1781
+ for stem in stems {
1782
+ for tld in tlds {
1783
+ candidates.push(format!("https://{}.{}", stem, tld));
1784
+ }
1785
+ }
1786
+
1787
+ let client = reqwest::Client::builder()
1788
+ .user_agent(BROWSER_USER_AGENT)
1789
+ .timeout(Duration::from_secs(8))
1790
+ .build()?;
1791
+
1792
+ let mut out = Vec::new();
1793
+ for url in candidates {
1794
+ if out.len() >= max_results.min(3) {
1795
+ break;
1796
+ }
1797
+ let response = match client.get(&url).send().await {
1798
+ Ok(r) => r,
1799
+ Err(_) => continue,
1800
+ };
1801
+ if !response.status().is_success() {
1802
+ continue;
1803
+ }
1804
+ let body = response.text().await.unwrap_or_default();
1805
+ let title = extract_html_title(&body).unwrap_or_else(|| url.clone());
1806
+ let snippet = extract_text_snippet_from_html(&body, 260);
1807
+ out.push(SearchResult {
1808
+ title,
1809
+ url: url.clone(),
1810
+ content: snippet,
1811
+ engine: "direct-domain-probe".to_string(),
1812
+ provider: "direct-probe".to_string(),
1813
+ source_domain: source_domain(&url),
1814
+ score: None,
1815
+ endpoint: None,
1816
+ });
1817
+ }
1818
+
1819
+ Ok(out)
1820
+ }
1821
+
1822
+ fn extract_html_title(html: &str) -> Option<String> {
1823
+ let lower = html.to_lowercase();
1824
+ let start = lower.find("<title>")?;
1825
+ let end = lower[start + 7..].find("</title>")?;
1826
+ let raw = &html[start + 7..start + 7 + end];
1827
+ let title = decode_xml_entities(raw).trim().to_string();
1828
+ if title.is_empty() {
1829
+ None
1830
+ } else {
1831
+ Some(title)
1832
+ }
1833
+ }
1834
+
1835
+ fn extract_text_snippet_from_html(html: &str, max_chars: usize) -> String {
1836
+ if html.trim().is_empty() {
1837
+ return String::new();
1838
+ }
1839
+ let document = Html::parse_document(html);
1840
+ let selectors = ["article", "main", ".content", "#content", "body"];
1841
+ let mut best = String::new();
1842
+ for selector_str in selectors {
1843
+ let Ok(selector) = Selector::parse(selector_str) else {
1844
+ continue;
1845
+ };
1846
+ for element in document.select(&selector) {
1847
+ let text = element
1848
+ .text()
1849
+ .collect::<Vec<_>>()
1850
+ .join(" ")
1851
+ .split_whitespace()
1852
+ .collect::<Vec<_>>()
1853
+ .join(" ");
1854
+ if text.len() > best.len() {
1855
+ best = text;
1856
+ }
1857
+ }
1858
+ if !best.is_empty() {
1859
+ break;
1860
+ }
1861
+ }
1862
+ truncate_text(best.trim(), max_chars)
1863
+ }
1864
+
1865
+ #[derive(Debug)]
1866
+ struct FeedItem {
1867
+ title: String,
1868
+ link: String,
1869
+ description: String,
1870
+ }
1871
+
1872
+ fn parse_rss_items(xml: &str, limit: usize) -> Vec<FeedItem> {
1873
+ let mut items = Vec::new();
1874
+ let mut rest = xml;
1875
+
1876
+ while let Some(start_idx) = rest.find("<item") {
1877
+ let after_start = &rest[start_idx..];
1878
+ let Some(open_end) = after_start.find('>') else {
1879
+ break;
1880
+ };
1881
+ let content_start = start_idx + open_end + 1;
1882
+ let Some(close_rel) = rest[content_start..].find("</item>") else {
1883
+ break;
1884
+ };
1885
+ let content_end = content_start + close_rel;
1886
+ let chunk = &rest[content_start..content_end];
1887
+
1888
+ let title = extract_xml_tag(chunk, "title").unwrap_or_default();
1889
+ let link = extract_xml_tag(chunk, "link").unwrap_or_default();
1890
+ let description = extract_xml_tag(chunk, "description")
1891
+ .or_else(|| extract_xml_tag(chunk, "content:encoded"))
1892
+ .unwrap_or_default();
1893
+
1894
+ if !link.trim().is_empty() {
1895
+ items.push(FeedItem {
1896
+ title: clean_feed_text(&title),
1897
+ link: clean_feed_text(&link),
1898
+ description: clean_feed_text(&description),
1899
+ });
1900
+ }
1901
+
1902
+ rest = &rest[content_end + "</item>".len()..];
1903
+ if items.len() >= limit {
1904
+ break;
1905
+ }
1906
+ }
1907
+
1908
+ items
1909
+ }
1910
+
1911
+ fn parse_atom_entries(xml: &str, limit: usize) -> Vec<FeedItem> {
1912
+ let mut entries = Vec::new();
1913
+ let mut rest = xml;
1914
+
1915
+ while let Some(start_idx) = rest.find("<entry") {
1916
+ let after_start = &rest[start_idx..];
1917
+ let Some(open_end) = after_start.find('>') else {
1918
+ break;
1919
+ };
1920
+ let content_start = start_idx + open_end + 1;
1921
+ let Some(close_rel) = rest[content_start..].find("</entry>") else {
1922
+ break;
1923
+ };
1924
+ let content_end = content_start + close_rel;
1925
+ let chunk = &rest[content_start..content_end];
1926
+
1927
+ let title = extract_xml_tag(chunk, "title").unwrap_or_default();
1928
+ let description = extract_xml_tag(chunk, "summary")
1929
+ .or_else(|| extract_xml_tag(chunk, "content"))
1930
+ .unwrap_or_default();
1931
+
1932
+ let link = extract_atom_link(chunk).unwrap_or_default();
1933
+
1934
+ if !link.trim().is_empty() {
1935
+ entries.push(FeedItem {
1936
+ title: clean_feed_text(&title),
1937
+ link: clean_feed_text(&link),
1938
+ description: clean_feed_text(&description),
1939
+ });
1940
+ }
1941
+
1942
+ rest = &rest[content_end + "</entry>".len()..];
1943
+ if entries.len() >= limit {
1944
+ break;
1945
+ }
1946
+ }
1947
+
1948
+ entries
1949
+ }
1950
+
1951
+ fn extract_atom_link(chunk: &str) -> Option<String> {
1952
+ let mut cursor = chunk;
1953
+ while let Some(idx) = cursor.find("<link") {
1954
+ let tail = &cursor[idx..];
1955
+ let end = tail.find('>')?;
1956
+ let tag = &tail[..=end];
1957
+ if let Some(href) = extract_attr(tag, "href") {
1958
+ if !href.trim().is_empty() {
1959
+ return Some(href);
1960
+ }
1961
+ }
1962
+ cursor = &tail[end + 1..];
1963
+ }
1964
+ None
1965
+ }
1966
+
1967
+ fn extract_attr(tag: &str, attr: &str) -> Option<String> {
1968
+ let needle = format!("{}=\"", attr);
1969
+ let start = tag.find(&needle)? + needle.len();
1970
+ let end = tag[start..].find('"')?;
1971
+ Some(tag[start..start + end].to_string())
1972
+ }
1973
+
1974
+ fn extract_xml_tag(chunk: &str, tag: &str) -> Option<String> {
1975
+ let open = format!("<{}>", tag);
1976
+ let close = format!("</{}>", tag);
1977
+
1978
+ if let Some(start) = chunk.find(&open) {
1979
+ let value_start = start + open.len();
1980
+ if let Some(end_rel) = chunk[value_start..].find(&close) {
1981
+ return Some(chunk[value_start..value_start + end_rel].to_string());
1982
+ }
1983
+ }
1984
+
1985
+ None
1986
+ }
1987
+
1988
+ fn clean_feed_text(value: &str) -> String {
1989
+ let mut out = value.trim().to_string();
1990
+ out = out.replace("<![CDATA[", "").replace("]]>", "");
1991
+ out = decode_xml_entities(&out);
1992
+ out = strip_html_tags(&out);
1993
+ out.split_whitespace().collect::<Vec<_>>().join(" ")
1994
+ }
1995
+
1996
+ fn decode_xml_entities(input: &str) -> String {
1997
+ input
1998
+ .replace("&amp;", "&")
1999
+ .replace("&lt;", "<")
2000
+ .replace("&gt;", ">")
2001
+ .replace("&quot;", "\"")
2002
+ .replace("&#39;", "'")
2003
+ .replace("&apos;", "'")
2004
+ }
2005
+
2006
+ fn strip_html_tags(input: &str) -> String {
2007
+ let mut out = String::with_capacity(input.len());
2008
+ let mut in_tag = false;
2009
+ for ch in input.chars() {
2010
+ match ch {
2011
+ '<' => in_tag = true,
2012
+ '>' => in_tag = false,
2013
+ _ if !in_tag => out.push(ch),
2014
+ _ => {}
2015
+ }
2016
+ }
2017
+ out
2018
+ }
2019
+
2020
+ fn resolve_searx_endpoints(endpoint_override: Option<&str>) -> Vec<String> {
2021
+ if let Some(single) = endpoint_override.map(str::trim).filter(|s| !s.is_empty()) {
2022
+ return vec![single.trim_end_matches('/').to_string()];
2023
+ }
2024
+
2025
+ if let Ok(list) = std::env::var("MASIX_DISCOVERY_SEARXNG_URLS") {
2026
+ let parsed = parse_endpoint_list(&list);
2027
+ if !parsed.is_empty() {
2028
+ return parsed;
2029
+ }
2030
+ }
2031
+
2032
+ if let Ok(single) = std::env::var("MASIX_DISCOVERY_SEARXNG_URL") {
2033
+ let parsed = parse_endpoint_list(&single);
2034
+ if !parsed.is_empty() {
2035
+ return parsed;
2036
+ }
2037
+ }
2038
+
2039
+ DEFAULT_SEARXNG_URLS
2040
+ .iter()
2041
+ .map(|v| v.trim_end_matches('/').to_string())
2042
+ .collect()
2043
+ }
2044
+
2045
+ fn parse_endpoint_list(raw: &str) -> Vec<String> {
2046
+ let mut out = Vec::new();
2047
+ let mut seen = HashSet::new();
2048
+
2049
+ for token in raw.split(',') {
2050
+ let value = token.trim().trim_end_matches('/').to_string();
2051
+ if value.is_empty() {
2052
+ continue;
2053
+ }
2054
+ if !(value.starts_with("https://") || value.starts_with("http://")) {
2055
+ continue;
2056
+ }
2057
+ if seen.insert(value.clone()) {
2058
+ out.push(value);
2059
+ }
2060
+ if out.len() >= MAX_ENDPOINTS_FROM_CONFIG {
2061
+ break;
2062
+ }
2063
+ }
2064
+
2065
+ out
2066
+ }
2067
+
2068
+ fn summarize_for_error(body: &str) -> String {
2069
+ let compact = body.split_whitespace().collect::<Vec<_>>().join(" ");
2070
+ let mut text = compact;
2071
+ if text.len() > 180 {
2072
+ text.truncate(180);
2073
+ text.push_str("...");
2074
+ }
2075
+ if text.is_empty() {
2076
+ "(empty body)".to_string()
2077
+ } else {
2078
+ text
2079
+ }
2080
+ }
2081
+
2082
+ fn normalize_url_key(url: &str) -> String {
2083
+ match reqwest::Url::parse(url) {
2084
+ Ok(mut parsed) => {
2085
+ parsed.set_fragment(None);
2086
+ parsed
2087
+ .as_str()
2088
+ .trim_end_matches('/')
2089
+ .to_lowercase()
2090
+ .replace("http://", "https://")
2091
+ }
2092
+ Err(_) => url.trim().to_lowercase(),
2093
+ }
2094
+ }
2095
+
2096
+ fn source_domain(url: &str) -> String {
2097
+ reqwest::Url::parse(url)
2098
+ .ok()
2099
+ .and_then(|parsed| parsed.host_str().map(|s| s.to_string()))
2100
+ .unwrap_or_else(|| "unknown".to_string())
2101
+ }
2102
+
2103
+ fn now_unix_secs() -> u64 {
2104
+ SystemTime::now()
2105
+ .duration_since(UNIX_EPOCH)
2106
+ .map(|d| d.as_secs())
2107
+ .unwrap_or(0)
2108
+ }
2109
+
2110
+ async fn web_fetch_page(url: &str) -> Result<String> {
2111
+ let client = reqwest::Client::builder()
2112
+ .user_agent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36")
2113
+ .timeout(Duration::from_secs(DEFAULT_FETCH_TIMEOUT_SECS))
2114
+ .build()?;
2115
+ let response = client.get(url).send().await?;
2116
+ if !response.status().is_success() {
2117
+ return Err(anyhow!("fetch failed: HTTP {}", response.status()));
2118
+ }
2119
+ let html = response.text().await?;
2120
+ let document = Html::parse_document(&html);
2121
+
2122
+ let selectors = [
2123
+ "article", "main", ".content", "#content", ".post", ".article", "body",
2124
+ ];
2125
+ let mut text_content = String::new();
2126
+ for selector_str in selectors {
2127
+ if let Ok(selector) = Selector::parse(selector_str) {
2128
+ for element in document.select(&selector) {
2129
+ let text = element
2130
+ .text()
2131
+ .collect::<String>()
2132
+ .split_whitespace()
2133
+ .collect::<Vec<_>>()
2134
+ .join(" ");
2135
+ if text.len() > text_content.len() {
2136
+ text_content = text;
2137
+ }
2138
+ }
2139
+ if !text_content.is_empty() {
2140
+ break;
2141
+ }
2142
+ }
2143
+ }
2144
+
2145
+ if text_content.is_empty() {
2146
+ text_content = "(no readable text extracted)".to_string();
2147
+ }
2148
+ if text_content.len() > MAX_WEB_CONTENT {
2149
+ text_content.truncate(MAX_WEB_CONTENT);
2150
+ text_content.push_str("... [truncated]");
2151
+ }
2152
+
2153
+ let title = Selector::parse("title")
2154
+ .ok()
2155
+ .and_then(|sel| document.select(&sel).next())
2156
+ .map(|e| e.text().collect::<String>())
2157
+ .unwrap_or_else(|| "N/A".to_string());
2158
+
2159
+ Ok(format!(
2160
+ "Title: {}\nURL: {}\n\n{}",
2161
+ title.trim(),
2162
+ url,
2163
+ text_content
2164
+ ))
2165
+ }
2166
+
2167
+ async fn torrent_search(
2168
+ endpoint_override: Option<&str>,
2169
+ query: &str,
2170
+ max_results: usize,
2171
+ with_magnets: bool,
2172
+ ) -> Result<Vec<TorrentSearchResult>> {
2173
+ let normalized_query = normalize_torrent_query(query);
2174
+ let direct_query = query.trim();
2175
+ let direct_query = if direct_query.is_empty() {
2176
+ normalized_query.as_str()
2177
+ } else {
2178
+ direct_query
2179
+ };
2180
+ let endpoints = resolve_searx_endpoints(endpoint_override);
2181
+
2182
+ let searx_future = searx_search_broker(
2183
+ &endpoints,
2184
+ &normalized_query,
2185
+ (max_results * 2).min(MAX_PROVIDER_RESULTS),
2186
+ MAX_SEARX_PARALLEL_ENDPOINTS,
2187
+ );
2188
+ let nyaa_future = nyaa_torrent_search(direct_query, max_results * 2);
2189
+ let archive_future = archive_torrent_search(direct_query, max_results * 2);
2190
+
2191
+ let (searx, nyaa, archive) = tokio::join!(searx_future, nyaa_future, archive_future);
2192
+
2193
+ let mut output = Vec::new();
2194
+ let mut provider_errors = Vec::new();
2195
+
2196
+ match nyaa {
2197
+ Ok(mut items) => output.append(&mut items),
2198
+ Err(e) => provider_errors.push(format!("nyaa: {}", e)),
2199
+ }
2200
+
2201
+ match searx {
2202
+ Ok(items) => {
2203
+ for result in items {
2204
+ let magnet_links = if with_magnets {
2205
+ extract_magnet_links(&result.url, 3)
2206
+ .await
2207
+ .unwrap_or_default()
2208
+ } else {
2209
+ Vec::new()
2210
+ };
2211
+ output.push(TorrentSearchResult {
2212
+ title: result.title,
2213
+ url: result.url,
2214
+ content: result.content,
2215
+ engine: if result.engine.trim().is_empty() {
2216
+ "searx".to_string()
2217
+ } else {
2218
+ result.engine
2219
+ },
2220
+ magnet_links,
2221
+ });
2222
+ }
2223
+ }
2224
+ Err(e) => provider_errors.push(format!("searx: {}", e)),
2225
+ }
2226
+
2227
+ match archive {
2228
+ Ok(mut items) => output.append(&mut items),
2229
+ Err(e) => provider_errors.push(format!("archive: {}", e)),
2230
+ }
2231
+
2232
+ output = dedup_torrent_results(output, with_magnets);
2233
+ output.truncate(max_results.min(20).max(1));
2234
+
2235
+ if output.is_empty() {
2236
+ if provider_errors.is_empty() {
2237
+ Err(anyhow!("torrent search returned no results"))
2238
+ } else {
2239
+ Err(anyhow!(
2240
+ "torrent search failed across providers: {}",
2241
+ provider_errors.join(" | ")
2242
+ ))
2243
+ }
2244
+ } else {
2245
+ Ok(output)
2246
+ }
2247
+ }
2248
+
2249
+ fn dedup_torrent_results(
2250
+ mut results: Vec<TorrentSearchResult>,
2251
+ with_magnets: bool,
2252
+ ) -> Vec<TorrentSearchResult> {
2253
+ let mut seen = HashSet::new();
2254
+ let mut deduped = Vec::with_capacity(results.len());
2255
+
2256
+ for mut item in results.drain(..) {
2257
+ let key = normalize_url_key(&item.url);
2258
+ if key.is_empty() || !seen.insert(key) {
2259
+ continue;
2260
+ }
2261
+ if !with_magnets {
2262
+ item.magnet_links.clear();
2263
+ }
2264
+ deduped.push(item);
2265
+ }
2266
+
2267
+ deduped.sort_by(|a, b| {
2268
+ torrent_engine_rank(&b.engine)
2269
+ .cmp(&torrent_engine_rank(&a.engine))
2270
+ .then_with(|| b.magnet_links.len().cmp(&a.magnet_links.len()))
2271
+ .then_with(|| a.title.cmp(&b.title))
2272
+ });
2273
+
2274
+ deduped
2275
+ }
2276
+
2277
+ fn torrent_engine_rank(engine: &str) -> u8 {
2278
+ let lowered = engine.to_lowercase();
2279
+ if lowered.contains("nyaa") {
2280
+ 4
2281
+ } else if lowered.contains("searx") {
2282
+ 3
2283
+ } else if lowered.contains("archive") {
2284
+ 2
2285
+ } else {
2286
+ 1
2287
+ }
2288
+ }
2289
+
2290
+ async fn nyaa_torrent_search(query: &str, max_results: usize) -> Result<Vec<TorrentSearchResult>> {
2291
+ let url = format!("https://nyaa.si/?f=0&c=0_0&q={}", url_encode(query));
2292
+ let client = reqwest::Client::builder()
2293
+ .user_agent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36")
2294
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
2295
+ .build()?;
2296
+ let response = client.get(&url).send().await?;
2297
+ if !response.status().is_success() {
2298
+ return Err(anyhow!("Nyaa provider failed: HTTP {}", response.status()));
2299
+ }
2300
+ let html = response.text().await?;
2301
+ let document = Html::parse_document(&html);
2302
+
2303
+ let row_selector = Selector::parse("table tbody tr")
2304
+ .map_err(|e| anyhow!("Invalid Nyaa row selector: {}", e))?;
2305
+ let title_selector = Selector::parse("td[colspan=\"2\"] a[href^=\"/view/\"]")
2306
+ .map_err(|e| anyhow!("Invalid Nyaa title selector: {}", e))?;
2307
+ let magnet_selector = Selector::parse("a[href^=\"magnet:?\"]")
2308
+ .map_err(|e| anyhow!("Invalid Nyaa magnet selector: {}", e))?;
2309
+ let cell_selector =
2310
+ Selector::parse("td").map_err(|e| anyhow!("Invalid Nyaa cell selector: {}", e))?;
2311
+
2312
+ let mut output = Vec::new();
2313
+ for row in document.select(&row_selector) {
2314
+ if output.len() >= max_results.min(MAX_PROVIDER_RESULTS) {
2315
+ break;
2316
+ }
2317
+
2318
+ let mut title = String::new();
2319
+ let mut detail_url = String::new();
2320
+ for link in row.select(&title_selector) {
2321
+ if let Some(href) = link.value().attr("href") {
2322
+ title = link.text().collect::<String>().trim().to_string();
2323
+ detail_url = format!("https://nyaa.si{}", href);
2324
+ break;
2325
+ }
2326
+ }
2327
+ if detail_url.is_empty() {
2328
+ continue;
2329
+ }
2330
+
2331
+ let mut magnets = Vec::new();
2332
+ for a in row.select(&magnet_selector) {
2333
+ if let Some(href) = a.value().attr("href") {
2334
+ let cleaned = sanitize_magnet(href);
2335
+ if cleaned.starts_with("magnet:?") {
2336
+ magnets.push(cleaned);
2337
+ }
2338
+ }
2339
+ }
2340
+
2341
+ let mut columns_text = row
2342
+ .select(&cell_selector)
2343
+ .map(|td| td.text().collect::<String>())
2344
+ .collect::<Vec<_>>();
2345
+ columns_text.retain(|v| !v.trim().is_empty());
2346
+ let summary = columns_text.join(" | ");
2347
+ let summary = summary.split_whitespace().collect::<Vec<_>>().join(" ");
2348
+
2349
+ output.push(TorrentSearchResult {
2350
+ title: if title.is_empty() {
2351
+ "Nyaa result".to_string()
2352
+ } else {
2353
+ title
2354
+ },
2355
+ url: detail_url,
2356
+ content: if summary.is_empty() {
2357
+ "Result from Nyaa index".to_string()
2358
+ } else {
2359
+ truncate_text(&summary, 700)
2360
+ },
2361
+ engine: "nyaa-html".to_string(),
2362
+ magnet_links: magnets,
2363
+ });
2364
+ }
2365
+
2366
+ if output.is_empty() {
2367
+ return Err(anyhow!("Nyaa provider returned no results"));
2368
+ }
2369
+
2370
+ Ok(output)
2371
+ }
2372
+
2373
+ async fn archive_torrent_search(
2374
+ query: &str,
2375
+ max_results: usize,
2376
+ ) -> Result<Vec<TorrentSearchResult>> {
2377
+ let url = format!(
2378
+ "https://archive.org/advancedsearch.php?q={}&fl[]=identifier,title,description&rows={}&page=1&output=json",
2379
+ url_encode(query),
2380
+ max_results.min(MAX_PROVIDER_RESULTS).max(1)
2381
+ );
2382
+ let client = reqwest::Client::builder()
2383
+ .user_agent(format!("MasiXDiscovery/{} archive", MODULE_VERSION))
2384
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
2385
+ .build()?;
2386
+ let response = client.get(&url).send().await?;
2387
+ if !response.status().is_success() {
2388
+ return Err(anyhow!(
2389
+ "Archive provider failed: HTTP {}",
2390
+ response.status()
2391
+ ));
2392
+ }
2393
+
2394
+ let payload: ArchiveResponse = response.json().await?;
2395
+ let mut output = Vec::new();
2396
+ for doc in payload
2397
+ .response
2398
+ .docs
2399
+ .into_iter()
2400
+ .take(max_results.min(MAX_PROVIDER_RESULTS))
2401
+ {
2402
+ if doc.identifier.trim().is_empty() {
2403
+ continue;
2404
+ }
2405
+
2406
+ let title = doc
2407
+ .title
2408
+ .unwrap_or_else(|| format!("archive.org item {}", doc.identifier));
2409
+ let description = truncate_text(&archive_description_to_string(doc.description), 700);
2410
+ let item_url = format!("https://archive.org/details/{}", doc.identifier);
2411
+
2412
+ output.push(TorrentSearchResult {
2413
+ title,
2414
+ url: item_url,
2415
+ content: if description.is_empty() {
2416
+ "Result from archive.org".to_string()
2417
+ } else {
2418
+ description
2419
+ },
2420
+ engine: "archive-search".to_string(),
2421
+ magnet_links: Vec::new(),
2422
+ });
2423
+ }
2424
+
2425
+ if output.is_empty() {
2426
+ return Err(anyhow!("Archive provider returned no results"));
2427
+ }
2428
+
2429
+ Ok(output)
2430
+ }
2431
+
2432
+ fn archive_description_to_string(value: Option<serde_json::Value>) -> String {
2433
+ let Some(value) = value else {
2434
+ return String::new();
2435
+ };
2436
+
2437
+ match value {
2438
+ serde_json::Value::String(v) => v,
2439
+ serde_json::Value::Array(values) => values
2440
+ .into_iter()
2441
+ .filter_map(|v| v.as_str().map(|s| s.to_string()))
2442
+ .collect::<Vec<_>>()
2443
+ .join(" "),
2444
+ _ => String::new(),
2445
+ }
2446
+ }
2447
+
2448
+ fn truncate_text(value: &str, max_chars: usize) -> String {
2449
+ if value.chars().count() <= max_chars {
2450
+ return value.to_string();
2451
+ }
2452
+ let mut out = String::new();
2453
+ for (idx, ch) in value.chars().enumerate() {
2454
+ if idx >= max_chars {
2455
+ break;
2456
+ }
2457
+ out.push(ch);
2458
+ }
2459
+ out.push_str("...");
2460
+ out
2461
+ }
2462
+
2463
+ fn normalize_torrent_query(query: &str) -> String {
2464
+ let trimmed = query.trim();
2465
+ if trimmed.is_empty() {
2466
+ "torrent".to_string()
2467
+ } else if trimmed.to_lowercase().contains("torrent") {
2468
+ trimmed.to_string()
2469
+ } else {
2470
+ format!("{} torrent", trimmed)
2471
+ }
2472
+ }
2473
+
2474
+ async fn extract_magnet_links(url: &str, max_links: usize) -> Result<Vec<String>> {
2475
+ if url.trim_start().starts_with("magnet:?") {
2476
+ return Ok(vec![sanitize_magnet(url)]);
2477
+ }
2478
+
2479
+ let client = reqwest::Client::builder()
2480
+ .user_agent(format!("MasiXDiscovery/{} torrent", MODULE_VERSION))
2481
+ .timeout(Duration::from_secs(DEFAULT_SEARCH_TIMEOUT_SECS))
2482
+ .build()?;
2483
+ let response = client.get(url).send().await?;
2484
+ if !response.status().is_success() {
2485
+ return Ok(Vec::new());
2486
+ }
2487
+ let html = response.text().await.unwrap_or_default();
2488
+ let document = Html::parse_document(&html);
2489
+ let selector = Selector::parse("a[href^=\"magnet:?\"]").ok();
2490
+
2491
+ let mut seen = HashSet::new();
2492
+ let mut output = Vec::new();
2493
+
2494
+ if let Some(sel) = selector {
2495
+ for element in document.select(&sel) {
2496
+ if let Some(link) = element.value().attr("href") {
2497
+ let cleaned = sanitize_magnet(link);
2498
+ if cleaned.starts_with("magnet:?")
2499
+ && seen.insert(cleaned.clone())
2500
+ && output.len() < max_links
2501
+ {
2502
+ output.push(cleaned);
2503
+ }
2504
+ }
2505
+ if output.len() >= max_links {
2506
+ break;
2507
+ }
2508
+ }
2509
+ }
2510
+
2511
+ if output.len() < max_links {
2512
+ for candidate in extract_magnet_links_from_text(&html) {
2513
+ if seen.insert(candidate.clone()) {
2514
+ output.push(candidate);
2515
+ }
2516
+ if output.len() >= max_links {
2517
+ break;
2518
+ }
2519
+ }
2520
+ }
2521
+
2522
+ Ok(output)
2523
+ }
2524
+
2525
+ fn extract_magnet_links_from_text(text: &str) -> Vec<String> {
2526
+ let mut links = Vec::new();
2527
+ let mut idx = 0usize;
2528
+ while idx < text.len() {
2529
+ let Some(found) = text[idx..].find("magnet:?") else {
2530
+ break;
2531
+ };
2532
+ let start = idx + found;
2533
+ let remainder = &text[start..];
2534
+ let end_rel = remainder
2535
+ .find(|c: char| c.is_whitespace() || matches!(c, '"' | '\'' | '<' | '>'))
2536
+ .unwrap_or(remainder.len());
2537
+ let raw = &remainder[..end_rel];
2538
+ let candidate = sanitize_magnet(raw);
2539
+ if candidate.starts_with("magnet:?") {
2540
+ links.push(candidate);
2541
+ }
2542
+ idx = start + end_rel;
2543
+ }
2544
+ links
2545
+ }
2546
+
2547
+ fn sanitize_magnet(value: &str) -> String {
2548
+ value
2549
+ .trim()
2550
+ .trim_matches('"')
2551
+ .trim_matches('\'')
2552
+ .replace("&amp;", "&")
2553
+ }
2554
+
2555
+ fn url_encode(value: &str) -> String {
2556
+ let mut out = String::new();
2557
+ for b in value.as_bytes() {
2558
+ let is_unreserved = matches!(
2559
+ *b,
2560
+ b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~'
2561
+ );
2562
+ if is_unreserved {
2563
+ out.push(*b as char);
2564
+ } else {
2565
+ use std::fmt::Write as _;
2566
+ let _ = write!(&mut out, "%{:02X}", b);
2567
+ }
2568
+ }
2569
+ out
2570
+ }