arch-ops-server 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
arch_ops_server/aur.py ADDED
@@ -0,0 +1,1132 @@
1
+ """
2
+ AUR (Arch User Repository) interface module.
3
+ Provides search, package info, and PKGBUILD retrieval via AUR RPC v5.
4
+ """
5
+
6
+ import logging
7
+ from typing import Dict, Any, List, Optional
8
+ import httpx
9
+ from datetime import datetime
10
+
11
+ from .utils import (
12
+ create_error_response,
13
+ add_aur_warning,
14
+ get_aur_helper,
15
+ IS_ARCH,
16
+ run_command
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ # AUR API endpoints
22
+ AUR_RPC_URL = "https://aur.archlinux.org/rpc"
23
+ AUR_CGIT_BASE_URL = "https://aur.archlinux.org/cgit/aur.git/plain" # No cloning - direct file access via web
24
+
25
+ # HTTP client settings
26
+ DEFAULT_TIMEOUT = 10.0
27
+ MAX_RESULTS = 50 # AUR RPC limit
28
+
29
+
30
+ async def search_aur(query: str, limit: int = 20, sort_by: str = "relevance") -> Dict[str, Any]:
31
+ """
32
+ Search AUR packages using RPC v5 interface with smart ranking.
33
+
34
+ Args:
35
+ query: Search term (searches name and description)
36
+ limit: Maximum results to return (default: 20, max: 50)
37
+ sort_by: Sorting method - "relevance", "votes", "popularity", "modified" (default: relevance)
38
+
39
+ Returns:
40
+ Dict with AUR packages and safety warning
41
+ """
42
+ logger.info(f"Searching AUR for: {query} (sort: {sort_by})")
43
+
44
+ # Clamp limit
45
+ limit = min(limit, MAX_RESULTS)
46
+
47
+ params = {
48
+ "v": "5",
49
+ "type": "search",
50
+ "arg": query
51
+ }
52
+
53
+ try:
54
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
55
+ response = await client.get(AUR_RPC_URL, params=params)
56
+ response.raise_for_status()
57
+
58
+ data = response.json()
59
+
60
+ if data.get("type") == "error":
61
+ return create_error_response(
62
+ "AURError",
63
+ data.get("error", "Unknown AUR error")
64
+ )
65
+
66
+ results = data.get("results", [])
67
+
68
+ # Apply smart ranking based on sort_by parameter
69
+ sorted_results = _apply_smart_ranking(results, query, sort_by)
70
+
71
+ # Limit and format results
72
+ formatted_results = [
73
+ _format_package_info(pkg)
74
+ for pkg in sorted_results[:limit]
75
+ ]
76
+
77
+ logger.info(f"Found {len(formatted_results)} AUR packages for '{query}'")
78
+
79
+ # Wrap with safety warning
80
+ return add_aur_warning({
81
+ "query": query,
82
+ "count": len(formatted_results),
83
+ "total_found": len(results),
84
+ "sort_by": sort_by,
85
+ "results": formatted_results
86
+ })
87
+
88
+ except httpx.TimeoutException:
89
+ logger.error(f"AUR search timed out for query: {query}")
90
+ return create_error_response(
91
+ "TimeoutError",
92
+ f"AUR search timed out for query: {query}",
93
+ "The AUR server did not respond in time. Try again later."
94
+ )
95
+ except httpx.HTTPStatusError as e:
96
+ # Handle rate limiting specifically
97
+ if e.response.status_code == 429:
98
+ logger.error("AUR rate limit exceeded")
99
+ return create_error_response(
100
+ "RateLimitError",
101
+ "AUR rate limit exceeded",
102
+ "Too many requests. Please wait before trying again."
103
+ )
104
+ logger.error(f"AUR search HTTP error: {e}")
105
+ return create_error_response(
106
+ "HTTPError",
107
+ f"AUR search failed with status {e.response.status_code}",
108
+ str(e)
109
+ )
110
+ except Exception as e:
111
+ logger.error(f"AUR search failed: {e}")
112
+ return create_error_response(
113
+ "SearchError",
114
+ f"Failed to search AUR: {str(e)}"
115
+ )
116
+
117
+
118
+ async def get_aur_info(package_name: str) -> Dict[str, Any]:
119
+ """
120
+ Get detailed information about a specific AUR package.
121
+
122
+ Args:
123
+ package_name: Exact package name
124
+
125
+ Returns:
126
+ Dict with package details and safety warning
127
+ """
128
+ logger.info(f"Fetching AUR info for: {package_name}")
129
+
130
+ params = {
131
+ "v": "5",
132
+ "type": "info",
133
+ "arg[]": package_name
134
+ }
135
+
136
+ try:
137
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
138
+ response = await client.get(AUR_RPC_URL, params=params)
139
+ response.raise_for_status()
140
+
141
+ data = response.json()
142
+
143
+ if data.get("type") == "error":
144
+ return create_error_response(
145
+ "AURError",
146
+ data.get("error", "Unknown AUR error")
147
+ )
148
+
149
+ results = data.get("results", [])
150
+
151
+ if not results:
152
+ return create_error_response(
153
+ "NotFound",
154
+ f"AUR package '{package_name}' not found"
155
+ )
156
+
157
+ package_info = _format_package_info(results[0], detailed=True)
158
+
159
+ logger.info(f"Successfully fetched info for {package_name}")
160
+
161
+ # Wrap with safety warning
162
+ return add_aur_warning(package_info)
163
+
164
+ except httpx.TimeoutException:
165
+ logger.error(f"AUR info fetch timed out for: {package_name}")
166
+ return create_error_response(
167
+ "TimeoutError",
168
+ f"AUR info fetch timed out for package: {package_name}"
169
+ )
170
+ except httpx.HTTPStatusError as e:
171
+ logger.error(f"AUR info HTTP error: {e}")
172
+ return create_error_response(
173
+ "HTTPError",
174
+ f"AUR info fetch failed with status {e.response.status_code}",
175
+ str(e)
176
+ )
177
+ except Exception as e:
178
+ logger.error(f"AUR info fetch failed: {e}")
179
+ return create_error_response(
180
+ "InfoError",
181
+ f"Failed to get AUR package info: {str(e)}"
182
+ )
183
+
184
+
185
+ async def get_aur_file(package_name: str, filename: str = "PKGBUILD") -> str:
186
+ """
187
+ Fetch any file from an AUR package via cgit web interface (no cloning required).
188
+
189
+ Uses AUR's cgit interface to fetch files directly via HTTP, avoiding the need
190
+ to clone the entire git repository.
191
+
192
+ Args:
193
+ package_name: Package name
194
+ filename: File to fetch (default: "PKGBUILD")
195
+ Common files: "PKGBUILD", ".SRCINFO", ".install", "*.patch"
196
+
197
+ Returns:
198
+ Raw file content as string
199
+
200
+ Raises:
201
+ ValueError: If file cannot be retrieved
202
+
203
+ Examples:
204
+ >>> pkgbuild = await get_aur_file("yay", "PKGBUILD")
205
+ >>> srcinfo = await get_aur_file("yay", ".SRCINFO")
206
+ """
207
+ logger.info(f"Fetching {filename} for package: {package_name}")
208
+
209
+ # Construct cgit URL for the specific file
210
+ # Format: https://aur.archlinux.org/cgit/aur.git/plain/{filename}?h={package_name}
211
+ base_url = "https://aur.archlinux.org/cgit/aur.git/plain"
212
+ url = f"{base_url}/{filename}?h={package_name}"
213
+
214
+ try:
215
+ async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
216
+ response = await client.get(url, follow_redirects=True)
217
+ response.raise_for_status()
218
+
219
+ content = response.text
220
+
221
+ # Basic validation - ensure we got actual content
222
+ if not content or len(content) < 10:
223
+ raise ValueError(f"Retrieved {filename} appears to be empty or invalid")
224
+
225
+ logger.info(f"Successfully fetched {filename} for {package_name} ({len(content)} bytes)")
226
+
227
+ return content
228
+
229
+ except httpx.HTTPStatusError as e:
230
+ if e.response.status_code == 404:
231
+ error_msg = f"{filename} not found for package '{package_name}'"
232
+ logger.error(error_msg)
233
+ raise ValueError(error_msg)
234
+ else:
235
+ logger.error(f"HTTP error fetching {filename}: {e}")
236
+ raise ValueError(f"Failed to fetch {filename}: HTTP {e.response.status_code}")
237
+ except httpx.TimeoutException:
238
+ error_msg = f"Timeout fetching {filename} for {package_name}"
239
+ logger.error(error_msg)
240
+ raise ValueError(error_msg)
241
+ except Exception as e:
242
+ logger.error(f"{filename} fetch failed: {e}")
243
+ raise ValueError(f"Failed to fetch {filename}: {str(e)}")
244
+
245
+
246
+ async def get_pkgbuild(package_name: str) -> str:
247
+ """
248
+ Fetch the PKGBUILD file for an AUR package (no cloning required).
249
+
250
+ This is a convenience wrapper around get_aur_file() specifically for PKGBUILDs.
251
+ Uses AUR's cgit web interface to fetch the file directly via HTTP.
252
+
253
+ Args:
254
+ package_name: Package name
255
+
256
+ Returns:
257
+ Raw PKGBUILD content as string
258
+
259
+ Raises:
260
+ ValueError: If PKGBUILD cannot be retrieved
261
+ """
262
+ return await get_aur_file(package_name, "PKGBUILD")
263
+
264
+
265
+ def _format_package_info(pkg: Dict[str, Any], detailed: bool = False) -> Dict[str, Any]:
266
+ """
267
+ Format AUR package data into clean structure.
268
+
269
+ Args:
270
+ pkg: Raw package data from AUR RPC
271
+ detailed: Include extended fields (default: False)
272
+
273
+ Returns:
274
+ Formatted package info dict
275
+ """
276
+ # Basic info always included
277
+ info = {
278
+ "name": pkg.get("Name"),
279
+ "version": pkg.get("Version"),
280
+ "description": pkg.get("Description"),
281
+ "maintainer": pkg.get("Maintainer"),
282
+ "votes": pkg.get("NumVotes", 0),
283
+ "popularity": round(pkg.get("Popularity", 0.0), 2),
284
+ "last_modified": _format_timestamp(pkg.get("LastModified")),
285
+ "out_of_date": pkg.get("OutOfDate") is not None,
286
+ }
287
+
288
+ # Extended info for detailed view
289
+ if detailed:
290
+ info.update({
291
+ "first_submitted": _format_timestamp(pkg.get("FirstSubmitted")),
292
+ "url": pkg.get("URL"),
293
+ "url_path": pkg.get("URLPath"),
294
+ "package_base": pkg.get("PackageBase"),
295
+ "depends": pkg.get("Depends", []),
296
+ "makedepends": pkg.get("MakeDepends", []),
297
+ "optdepends": pkg.get("OptDepends", []),
298
+ "conflicts": pkg.get("Conflicts", []),
299
+ "provides": pkg.get("Provides", []),
300
+ "license": pkg.get("License", []),
301
+ "keywords": pkg.get("Keywords", []),
302
+ })
303
+
304
+ return info
305
+
306
+
307
+ def _format_timestamp(timestamp: Optional[int]) -> Optional[str]:
308
+ """
309
+ Convert Unix timestamp to human-readable date.
310
+
311
+ Args:
312
+ timestamp: Unix timestamp
313
+
314
+ Returns:
315
+ ISO format date string or None
316
+ """
317
+ if timestamp is None:
318
+ return None
319
+
320
+ try:
321
+ dt = datetime.fromtimestamp(timestamp)
322
+ return dt.strftime("%Y-%m-%d %H:%M:%S")
323
+ except Exception:
324
+ return None
325
+
326
+
327
+ def analyze_package_metadata_risk(package_info: Dict[str, Any]) -> Dict[str, Any]:
328
+ """
329
+ Analyze AUR package metadata for security and trustworthiness indicators.
330
+
331
+ Evaluates:
332
+ - Package popularity and community trust (votes)
333
+ - Maintainer status (orphaned packages)
334
+ - Update frequency (out-of-date, abandoned packages)
335
+ - Package age and maturity
336
+ - Maintainer history
337
+
338
+ Args:
339
+ package_info: Package info dict from AUR RPC (formatted or raw)
340
+
341
+ Returns:
342
+ Dict with metadata risk analysis including:
343
+ - trust_score: 0-100 (higher = more trustworthy)
344
+ - risk_factors: list of identified risks
345
+ - trust_indicators: list of positive indicators
346
+ - recommendation: trust recommendation
347
+ """
348
+ from datetime import datetime, timedelta
349
+
350
+ risk_factors = []
351
+ trust_indicators = []
352
+
353
+ logger.debug(f"Analyzing metadata for package: {package_info.get('name', 'unknown')}")
354
+
355
+ # ========================================================================
356
+ # EXTRACT METADATA
357
+ # ========================================================================
358
+ votes = package_info.get("votes", package_info.get("NumVotes", 0))
359
+ popularity = package_info.get("popularity", package_info.get("Popularity", 0.0))
360
+ maintainer = package_info.get("maintainer", package_info.get("Maintainer"))
361
+ out_of_date = package_info.get("out_of_date", package_info.get("OutOfDate"))
362
+ last_modified = package_info.get("last_modified", package_info.get("LastModified"))
363
+ first_submitted = package_info.get("first_submitted", package_info.get("FirstSubmitted"))
364
+
365
+ # ========================================================================
366
+ # ANALYZE VOTING/POPULARITY
367
+ # ========================================================================
368
+ if votes == 0:
369
+ risk_factors.append({
370
+ "category": "popularity",
371
+ "severity": "HIGH",
372
+ "issue": "Package has zero votes - untested by community"
373
+ })
374
+ elif votes < 5:
375
+ risk_factors.append({
376
+ "category": "popularity",
377
+ "severity": "MEDIUM",
378
+ "issue": f"Low vote count ({votes}) - limited community validation"
379
+ })
380
+ elif votes >= 50:
381
+ trust_indicators.append({
382
+ "category": "popularity",
383
+ "indicator": f"High vote count ({votes}) - well-trusted by community"
384
+ })
385
+ elif votes >= 20:
386
+ trust_indicators.append({
387
+ "category": "popularity",
388
+ "indicator": f"Moderate vote count ({votes}) - some community validation"
389
+ })
390
+
391
+ # Popularity scoring
392
+ if popularity < 0.001:
393
+ risk_factors.append({
394
+ "category": "popularity",
395
+ "severity": "MEDIUM",
396
+ "issue": f"Very low popularity score ({popularity:.4f}) - rarely used"
397
+ })
398
+ elif popularity >= 1.0:
399
+ trust_indicators.append({
400
+ "category": "popularity",
401
+ "indicator": f"High popularity score ({popularity:.2f}) - widely used"
402
+ })
403
+
404
+ # ========================================================================
405
+ # ANALYZE MAINTAINER STATUS
406
+ # ========================================================================
407
+ if not maintainer or maintainer == "None":
408
+ risk_factors.append({
409
+ "category": "maintainer",
410
+ "severity": "CRITICAL",
411
+ "issue": "Package is ORPHANED - no active maintainer"
412
+ })
413
+ else:
414
+ trust_indicators.append({
415
+ "category": "maintainer",
416
+ "indicator": f"Active maintainer: {maintainer}"
417
+ })
418
+
419
+ # ========================================================================
420
+ # ANALYZE OUT-OF-DATE STATUS
421
+ # ========================================================================
422
+ if out_of_date:
423
+ # Check if out_of_date is a boolean or timestamp
424
+ if isinstance(out_of_date, bool) and out_of_date:
425
+ risk_factors.append({
426
+ "category": "maintenance",
427
+ "severity": "MEDIUM",
428
+ "issue": "Package is flagged as out-of-date"
429
+ })
430
+ elif isinstance(out_of_date, (int, float)):
431
+ # It's a timestamp
432
+ try:
433
+ ood_date = datetime.fromtimestamp(out_of_date)
434
+ ood_days = (datetime.now() - ood_date).days
435
+ risk_factors.append({
436
+ "category": "maintenance",
437
+ "severity": "MEDIUM" if ood_days < 90 else "HIGH",
438
+ "issue": f"Out-of-date for {ood_days} days since {ood_date.strftime('%Y-%m-%d')}"
439
+ })
440
+ except Exception:
441
+ risk_factors.append({
442
+ "category": "maintenance",
443
+ "severity": "MEDIUM",
444
+ "issue": "Package is flagged as out-of-date"
445
+ })
446
+
447
+ # ========================================================================
448
+ # ANALYZE LAST MODIFICATION TIME
449
+ # ========================================================================
450
+ if last_modified:
451
+ try:
452
+ # Handle both timestamp formats
453
+ if isinstance(last_modified, str):
454
+ # Try to parse from formatted string
455
+ last_mod_date = datetime.strptime(last_modified.split()[0], "%Y-%m-%d")
456
+ else:
457
+ # It's a Unix timestamp
458
+ last_mod_date = datetime.fromtimestamp(last_modified)
459
+
460
+ days_since_update = (datetime.now() - last_mod_date).days
461
+
462
+ if days_since_update > 730: # 2 years
463
+ risk_factors.append({
464
+ "category": "maintenance",
465
+ "severity": "HIGH",
466
+ "issue": f"Not updated in {days_since_update} days (~{days_since_update//365} years) - possibly abandoned"
467
+ })
468
+ elif days_since_update > 365: # 1 year
469
+ risk_factors.append({
470
+ "category": "maintenance",
471
+ "severity": "MEDIUM",
472
+ "issue": f"Not updated in {days_since_update} days (~{days_since_update//365} year) - low activity"
473
+ })
474
+ elif days_since_update <= 30:
475
+ trust_indicators.append({
476
+ "category": "maintenance",
477
+ "indicator": f"Recently updated ({days_since_update} days ago) - actively maintained"
478
+ })
479
+ except Exception as e:
480
+ logger.debug(f"Failed to parse last_modified: {e}")
481
+
482
+ # ========================================================================
483
+ # ANALYZE PACKAGE AGE
484
+ # ========================================================================
485
+ if first_submitted:
486
+ try:
487
+ # Handle both timestamp formats
488
+ if isinstance(first_submitted, str):
489
+ first_submit_date = datetime.strptime(first_submitted.split()[0], "%Y-%m-%d")
490
+ else:
491
+ first_submit_date = datetime.fromtimestamp(first_submitted)
492
+
493
+ package_age_days = (datetime.now() - first_submit_date).days
494
+
495
+ if package_age_days < 7:
496
+ risk_factors.append({
497
+ "category": "age",
498
+ "severity": "HIGH",
499
+ "issue": f"Very new package ({package_age_days} days old) - needs community review time"
500
+ })
501
+ elif package_age_days < 30:
502
+ risk_factors.append({
503
+ "category": "age",
504
+ "severity": "MEDIUM",
505
+ "issue": f"New package ({package_age_days} days old) - limited track record"
506
+ })
507
+ elif package_age_days >= 365:
508
+ trust_indicators.append({
509
+ "category": "age",
510
+ "indicator": f"Mature package ({package_age_days//365}+ years old) - established track record"
511
+ })
512
+ except Exception as e:
513
+ logger.debug(f"Failed to parse first_submitted: {e}")
514
+
515
+ # ========================================================================
516
+ # CALCULATE TRUST SCORE
517
+ # ========================================================================
518
+ # Start with base score of 50
519
+ trust_score = 50
520
+
521
+ # Adjust based on votes (max +30)
522
+ if votes >= 100:
523
+ trust_score += 30
524
+ elif votes >= 50:
525
+ trust_score += 20
526
+ elif votes >= 20:
527
+ trust_score += 10
528
+ elif votes >= 5:
529
+ trust_score += 5
530
+ elif votes == 0:
531
+ trust_score -= 20
532
+
533
+ # Adjust based on popularity (max +10)
534
+ if popularity >= 5.0:
535
+ trust_score += 10
536
+ elif popularity >= 1.0:
537
+ trust_score += 5
538
+ elif popularity < 0.001:
539
+ trust_score -= 10
540
+
541
+ # Penalties for risk factors
542
+ for risk in risk_factors:
543
+ if risk["severity"] == "CRITICAL":
544
+ trust_score -= 30
545
+ elif risk["severity"] == "HIGH":
546
+ trust_score -= 15
547
+ elif risk["severity"] == "MEDIUM":
548
+ trust_score -= 10
549
+
550
+ # Clamp between 0 and 100
551
+ trust_score = max(0, min(100, trust_score))
552
+
553
+ # ========================================================================
554
+ # GENERATE RECOMMENDATION
555
+ # ========================================================================
556
+ if trust_score >= 70:
557
+ recommendation = "✅ TRUSTED - Package has good community validation and maintenance"
558
+ elif trust_score >= 50:
559
+ recommendation = "⚠️ MODERATE TRUST - Package is acceptable but verify PKGBUILD carefully"
560
+ elif trust_score >= 30:
561
+ recommendation = "⚠️ LOW TRUST - Package has significant risk factors, extra caution needed"
562
+ else:
563
+ recommendation = "❌ UNTRUSTED - Package has critical trust issues, avoid unless necessary"
564
+
565
+ logger.info(f"Package metadata analysis: trust_score={trust_score}, "
566
+ f"{len(risk_factors)} risk factors, {len(trust_indicators)} trust indicators")
567
+
568
+ return {
569
+ "trust_score": trust_score,
570
+ "risk_factors": risk_factors,
571
+ "trust_indicators": trust_indicators,
572
+ "recommendation": recommendation,
573
+ "summary": {
574
+ "votes": votes,
575
+ "popularity": round(popularity, 4),
576
+ "is_orphaned": not maintainer or maintainer == "None",
577
+ "is_out_of_date": bool(out_of_date),
578
+ "total_risk_factors": len(risk_factors),
579
+ "total_trust_indicators": len(trust_indicators)
580
+ }
581
+ }
582
+
583
+
584
+ def _apply_smart_ranking(
585
+ packages: List[Dict[str, Any]],
586
+ query: str,
587
+ sort_by: str
588
+ ) -> List[Dict[str, Any]]:
589
+ """
590
+ Apply smart ranking to AUR search results.
591
+
592
+ Sorting methods:
593
+ - relevance: Name match priority, then by votes and popularity
594
+ - votes: Sort by number of votes (most popular first)
595
+ - popularity: Sort by AUR popularity metric
596
+ - modified: Sort by last modification date (most recent first)
597
+
598
+ Args:
599
+ packages: List of package dicts from AUR RPC
600
+ query: Original search query for relevance scoring
601
+ sort_by: Sorting method
602
+
603
+ Returns:
604
+ Sorted list of packages
605
+ """
606
+ if not packages:
607
+ return packages
608
+
609
+ query_lower = query.lower()
610
+
611
+ # Relevance scoring: prioritize exact name matches, then partial matches
612
+ if sort_by == "relevance":
613
+ def relevance_score(pkg: Dict[str, Any]) -> tuple:
614
+ name = pkg.get("Name", "").lower()
615
+ votes = pkg.get("NumVotes", 0)
616
+ popularity = pkg.get("Popularity", 0.0)
617
+
618
+ # Scoring priority (negative for reverse sort):
619
+ # 1. Exact name match (highest priority)
620
+ # 2. Name starts with query
621
+ # 3. Name contains query
622
+ # 4. Then by votes and popularity
623
+ exact_match = -1 if name == query_lower else 0
624
+ starts_with = -1 if name.startswith(query_lower) else 0
625
+ contains = -1 if query_lower in name else 0
626
+
627
+ return (exact_match, starts_with, contains, -votes, -popularity)
628
+
629
+ return sorted(packages, key=relevance_score)
630
+
631
+ elif sort_by == "votes":
632
+ return sorted(packages, key=lambda p: p.get("NumVotes", 0), reverse=True)
633
+
634
+ elif sort_by == "popularity":
635
+ return sorted(packages, key=lambda p: p.get("Popularity", 0.0), reverse=True)
636
+
637
+ elif sort_by == "modified":
638
+ return sorted(packages, key=lambda p: p.get("LastModified", 0), reverse=True)
639
+
640
+ else:
641
+ # Default to relevance if unknown sort method
642
+ logger.warning(f"Unknown sort method: {sort_by}, using relevance")
643
+ return _apply_smart_ranking(packages, query, "relevance")
644
+
645
+
646
+ async def install_package_secure(package_name: str) -> Dict[str, Any]:
647
+ """
648
+ Install a package with comprehensive security checks.
649
+
650
+ Workflow:
651
+ 1. Check if package exists in official repos first (safer)
652
+ 2. For AUR packages:
653
+ a. Fetch package metadata and analyze trust
654
+ b. Fetch and analyze PKGBUILD for security issues
655
+ c. Only proceed if security checks pass
656
+ 3. Check for AUR helper availability (paru > yay)
657
+ 4. Install with --noconfirm if all checks pass
658
+
659
+ Args:
660
+ package_name: Package name to install
661
+
662
+ Returns:
663
+ Dict with installation status and security analysis
664
+ """
665
+ logger.info(f"Starting secure installation workflow for: {package_name}")
666
+
667
+ # Only supported on Arch Linux
668
+ if not IS_ARCH:
669
+ return create_error_response(
670
+ "NotSupported",
671
+ "Package installation is only supported on Arch Linux systems",
672
+ "This server is not running on Arch Linux"
673
+ )
674
+
675
+ result = {
676
+ "package": package_name,
677
+ "installed": False,
678
+ "security_checks": {},
679
+ "messages": []
680
+ }
681
+
682
+ # ========================================================================
683
+ # STEP 1: Check if package is in official repos first
684
+ # ========================================================================
685
+ logger.info(f"[STEP 1/5] Checking if '{package_name}' is in official repos...")
686
+ result["messages"].append("🔍 Checking official repositories first...")
687
+
688
+ from .pacman import get_official_package_info
689
+ official_pkg = await get_official_package_info(package_name)
690
+
691
+ # If found in official repos, install directly with pacman
692
+ if not official_pkg.get("error"):
693
+ logger.info(f"Package '{package_name}' found in official repos - installing via pacman")
694
+ result["messages"].append(f"✅ Package found in official repository: {official_pkg.get('repository', 'unknown')}")
695
+ result["is_official"] = True
696
+ result["security_checks"]["source"] = "official_repository"
697
+ result["security_checks"]["risk_level"] = "LOW"
698
+ result["security_checks"]["recommendation"] = "✅ SAFE - Official repository package"
699
+
700
+ # Install using sudo pacman -S --noconfirm
701
+ try:
702
+ result["messages"].append("📦 Installing from official repository...")
703
+ exit_code, stdout, stderr = await run_command(
704
+ ["sudo", "pacman", "-S", "--noconfirm", package_name],
705
+ timeout=300, # 5 minutes for installation
706
+ check=False
707
+ )
708
+
709
+ if exit_code == 0:
710
+ result["installed"] = True
711
+ result["messages"].append(f"✅ Successfully installed {package_name} from official repository")
712
+ logger.info(f"Successfully installed official package: {package_name}")
713
+ else:
714
+ result["messages"].append(f"❌ Installation failed: {stderr}")
715
+ logger.error(f"pacman installation failed: {stderr}")
716
+
717
+ result["install_output"] = stdout
718
+ result["install_errors"] = stderr
719
+
720
+ return result
721
+
722
+ except Exception as e:
723
+ logger.error(f"Installation failed: {e}")
724
+ return create_error_response(
725
+ "InstallError",
726
+ f"Failed to install official package: {str(e)}"
727
+ )
728
+
729
+ # ========================================================================
730
+ # STEP 2: Package is in AUR - fetch and analyze metadata
731
+ # ========================================================================
732
+ logger.info(f"[STEP 2/5] Package not in official repos - checking AUR...")
733
+ result["messages"].append("⚠️ Package not in official repos - checking AUR...")
734
+ result["is_official"] = False
735
+
736
+ # Search AUR for package
737
+ aur_info = await get_aur_info(package_name)
738
+
739
+ if aur_info.get("error"):
740
+ return create_error_response(
741
+ "NotFound",
742
+ f"Package '{package_name}' not found in official repos or AUR"
743
+ )
744
+
745
+ # Extract actual package data (may be wrapped in warning)
746
+ pkg_data = aur_info.get("data", aur_info)
747
+ result["messages"].append(f"📦 Found in AUR: {pkg_data.get('name')} v{pkg_data.get('version')}")
748
+
749
+ # Analyze package metadata for trust
750
+ logger.info(f"[STEP 3/5] Analyzing package metadata for trust indicators...")
751
+ result["messages"].append("🔍 Analyzing package metadata (votes, maintainer, age)...")
752
+
753
+ metadata_analysis = analyze_package_metadata_risk(pkg_data)
754
+ result["security_checks"]["metadata_analysis"] = metadata_analysis
755
+ result["messages"].append(f"📊 Trust Score: {metadata_analysis['trust_score']}/100")
756
+ result["messages"].append(f" {metadata_analysis['recommendation']}")
757
+
758
+ # ========================================================================
759
+ # STEP 3: Fetch and analyze PKGBUILD
760
+ # ========================================================================
761
+ logger.info(f"[STEP 4/5] Fetching and analyzing PKGBUILD for security issues...")
762
+ result["messages"].append("🔍 Fetching PKGBUILD for security analysis...")
763
+
764
+ try:
765
+ pkgbuild_content = await get_pkgbuild(package_name)
766
+ result["messages"].append(f"✅ PKGBUILD fetched ({len(pkgbuild_content)} bytes)")
767
+
768
+ # Analyze PKGBUILD for security issues
769
+ result["messages"].append("🛡️ Analyzing PKGBUILD for security threats...")
770
+ pkgbuild_analysis = analyze_pkgbuild_safety(pkgbuild_content)
771
+ result["security_checks"]["pkgbuild_analysis"] = pkgbuild_analysis
772
+ result["messages"].append(f"🛡️ Risk Score: {pkgbuild_analysis['risk_score']}/100")
773
+ result["messages"].append(f" {pkgbuild_analysis['recommendation']}")
774
+
775
+ # Log findings
776
+ if pkgbuild_analysis["red_flags"]:
777
+ result["messages"].append(f" 🚨 {len(pkgbuild_analysis['red_flags'])} CRITICAL issues found!")
778
+ for flag in pkgbuild_analysis["red_flags"][:3]: # Show first 3
779
+ result["messages"].append(f" - Line {flag['line']}: {flag['issue']}")
780
+
781
+ if pkgbuild_analysis["warnings"]:
782
+ result["messages"].append(f" ⚠️ {len(pkgbuild_analysis['warnings'])} warnings found")
783
+
784
+ # Check if package is safe to install
785
+ if not pkgbuild_analysis["safe"]:
786
+ result["messages"].append("❌ INSTALLATION BLOCKED - Security analysis failed")
787
+ result["messages"].append(" Package has critical security issues and will NOT be installed")
788
+ result["security_checks"]["decision"] = "BLOCKED"
789
+ result["security_checks"]["reason"] = "Critical security issues detected in PKGBUILD"
790
+ logger.warning(f"Installation blocked for {package_name} due to security issues")
791
+ return result
792
+
793
+ # Additional check for high-risk warnings
794
+ if len(pkgbuild_analysis["warnings"]) >= 5:
795
+ result["messages"].append("⚠️ HIGH RISK - Multiple suspicious patterns detected")
796
+ result["messages"].append(" Manual review recommended before installation")
797
+ result["security_checks"]["decision"] = "REVIEW_RECOMMENDED"
798
+
799
+ except ValueError as e:
800
+ logger.error(f"Failed to fetch PKGBUILD: {e}")
801
+ return create_error_response(
802
+ "FetchError",
803
+ f"Failed to fetch PKGBUILD for security analysis: {str(e)}"
804
+ )
805
+
806
+ # ========================================================================
807
+ # STEP 4: Check for AUR helper
808
+ # ========================================================================
809
+ logger.info(f"[STEP 5/5] Checking for AUR helper (paru/yay)...")
810
+ result["messages"].append("🔧 Checking for AUR helper...")
811
+
812
+ aur_helper = get_aur_helper()
813
+
814
+ if not aur_helper:
815
+ result["messages"].append("❌ No AUR helper found (paru or yay)")
816
+ result["messages"].append(" Please install an AUR helper:")
817
+ result["messages"].append(" - Recommended: paru (pacman -S paru)")
818
+ result["messages"].append(" - Alternative: yay")
819
+ result["security_checks"]["decision"] = "NO_HELPER"
820
+ return result
821
+
822
+ result["messages"].append(f"✅ Using AUR helper: {aur_helper}")
823
+ result["aur_helper"] = aur_helper
824
+
825
+ # ========================================================================
826
+ # STEP 5: Install package with AUR helper
827
+ # ========================================================================
828
+ result["messages"].append(f"📦 Installing {package_name} via {aur_helper} (no confirmation)...")
829
+ logger.info(f"Installing AUR package {package_name} with {aur_helper}")
830
+
831
+ try:
832
+ # Install with --noconfirm flag
833
+ exit_code, stdout, stderr = await run_command(
834
+ [aur_helper, "-S", "--noconfirm", package_name],
835
+ timeout=600, # 10 minutes for AUR package build
836
+ check=False
837
+ )
838
+
839
+ if exit_code == 0:
840
+ result["installed"] = True
841
+ result["messages"].append(f"✅ Successfully installed {package_name} from AUR")
842
+ result["security_checks"]["decision"] = "INSTALLED"
843
+ logger.info(f"Successfully installed AUR package: {package_name}")
844
+ else:
845
+ result["messages"].append(f"❌ Installation failed with exit code {exit_code}")
846
+ result["messages"].append(f" Error: {stderr}")
847
+ result["security_checks"]["decision"] = "INSTALL_FAILED"
848
+ logger.error(f"AUR installation failed for {package_name}: {stderr}")
849
+
850
+ result["install_output"] = stdout
851
+ result["install_errors"] = stderr
852
+
853
+ except Exception as e:
854
+ logger.error(f"Installation failed: {e}")
855
+ result["messages"].append(f"❌ Installation exception: {str(e)}")
856
+ result["security_checks"]["decision"] = "INSTALL_ERROR"
857
+
858
+ return result
859
+
860
+
861
+ def analyze_pkgbuild_safety(pkgbuild_content: str) -> Dict[str, Any]:
862
+ """
863
+ Perform comprehensive safety analysis on PKGBUILD content.
864
+
865
+ Checks for:
866
+ - Dangerous commands (rm -rf /, dd, fork bombs, etc.)
867
+ - Obfuscated code (base64, eval, encoding tricks)
868
+ - Network activity (reverse shells, data exfiltration)
869
+ - Binary downloads and execution
870
+ - Privilege escalation attempts
871
+ - Cryptocurrency mining patterns
872
+ - Source URL validation
873
+ - Suspicious file operations
874
+
875
+ Args:
876
+ pkgbuild_content: Raw PKGBUILD text
877
+
878
+ Returns:
879
+ Dict with detailed safety analysis results including:
880
+ - safe: boolean
881
+ - red_flags: critical security issues
882
+ - warnings: suspicious patterns
883
+ - info: informational notices
884
+ - risk_score: 0-100 (higher = more dangerous)
885
+ - recommendation: action recommendation
886
+ """
887
+ import re
888
+ from urllib.parse import urlparse
889
+
890
+ red_flags = [] # Critical security issues
891
+ warnings = [] # Suspicious but not necessarily malicious
892
+ info = [] # Informational notices
893
+
894
+ lines = pkgbuild_content.split('\n')
895
+ logger.debug(f"Analyzing PKGBUILD with {len(lines)} lines")
896
+
897
+ # ========================================================================
898
+ # CRITICAL PATTERNS - Definitely malicious
899
+ # ========================================================================
900
+ dangerous_patterns = [
901
+ # Destructive commands
902
+ (r"rm\s+-rf\s+/[^a-zA-Z]", "CRITICAL: rm -rf / or /something detected - system destruction"),
903
+ (r"\bdd\b.*if=/dev/(zero|random|urandom).*of=/dev/sd", "CRITICAL: dd overwriting disk detected"),
904
+ (r":\(\)\{.*:\|:.*\}", "CRITICAL: Fork bomb detected"),
905
+ (r"\bmkfs\.", "CRITICAL: Filesystem formatting detected"),
906
+ (r"fdisk.*-w", "CRITICAL: Partition table modification detected"),
907
+
908
+ # Reverse shells and backdoors
909
+ (r"/dev/tcp/\d+\.\d+\.\d+\.\d+/\d+", "CRITICAL: Reverse shell via /dev/tcp detected"),
910
+ (r"nc\s+-[^-]*e\s+/bin/(ba)?sh", "CRITICAL: Netcat reverse shell detected"),
911
+ (r"bash\s+-i\s+>&\s+/dev/tcp/", "CRITICAL: Interactive reverse shell detected"),
912
+ (r"python.*socket.*connect", "CRITICAL: Python socket connection (potential backdoor)"),
913
+ (r"perl.*socket.*connect", "CRITICAL: Perl socket connection (potential backdoor)"),
914
+
915
+ # Malicious downloads and execution
916
+ (r"curl[^|]*\|\s*(ba)?sh", "CRITICAL: Piping curl to shell (remote code execution)"),
917
+ (r"wget[^|]*\|\s*(ba)?sh", "CRITICAL: Piping wget to shell (remote code execution)"),
918
+ (r"curl.*-o.*&&.*chmod\s+\+x.*&&\s*\./", "CRITICAL: Download, make executable, and run pattern"),
919
+
920
+ # Crypto mining patterns
921
+ (r"xmrig|minerd|cpuminer|ccminer", "CRITICAL: Cryptocurrency miner detected"),
922
+ (r"stratum\+tcp://", "CRITICAL: Mining pool connection detected"),
923
+ (r"--donate-level", "CRITICAL: XMRig miner option detected"),
924
+
925
+ # Rootkit/malware installation
926
+ (r"chattr\s+\+i", "CRITICAL: Making files immutable (rootkit technique)"),
927
+ (r"/etc/ld\.so\.preload", "CRITICAL: LD_PRELOAD manipulation (rootkit technique)"),
928
+ (r"HISTFILE=/dev/null", "CRITICAL: History clearing (covering tracks)"),
929
+ ]
930
+
931
+ # ========================================================================
932
+ # SUSPICIOUS PATTERNS - Require careful review
933
+ # ========================================================================
934
+ suspicious_patterns = [
935
+ # Obfuscation techniques
936
+ (r"base64\s+-d", "Obfuscation: base64 decoding detected"),
937
+ (r"xxd\s+-r", "Obfuscation: hex decoding detected"),
938
+ (r"\beval\b", "Obfuscation: eval usage (can execute arbitrary code)"),
939
+ (r"\$\(.*base64.*\)", "Obfuscation: base64 in command substitution"),
940
+ (r"openssl\s+enc\s+-d", "Obfuscation: encrypted content decoding"),
941
+ (r"echo.*\|.*sh", "Obfuscation: piping echo to shell"),
942
+ (r"printf.*\|.*sh", "Obfuscation: piping printf to shell"),
943
+
944
+ # Suspicious permissions and ownership
945
+ (r"chmod\s+[0-7]*7[0-7]*7", "Dangerous: world-writable permissions"),
946
+ (r"chown\s+root", "Suspicious: changing ownership to root"),
947
+ (r"chmod\s+[u+]*s", "Suspicious: setuid/setgid (privilege escalation risk)"),
948
+
949
+ # Suspicious file operations
950
+ (r"mktemp.*&&.*chmod", "Suspicious: temp file creation with permission change"),
951
+ (r">/dev/null\s+2>&1", "Suspicious: suppressing all output (hiding activity)"),
952
+ (r"nohup.*&", "Suspicious: background process that persists"),
953
+
954
+ # Network activity
955
+ (r"curl.*-s.*-o", "Network: silent download detected"),
956
+ (r"wget.*-q.*-O", "Network: quiet download detected"),
957
+ (r"nc\s+-l", "Network: netcat listening mode (potential backdoor)"),
958
+ (r"socat", "Network: socat usage (advanced networking tool)"),
959
+ (r"ssh.*-R\s+\d+:", "Network: SSH reverse tunnel detected"),
960
+
961
+ # Data exfiltration
962
+ (r"curl.*-X\s+POST.*--data", "Data exfiltration: HTTP POST with data"),
963
+ (r"tar.*\|.*ssh", "Data exfiltration: tar over SSH"),
964
+ (r"scp.*-r.*\*", "Data exfiltration: recursive SCP"),
965
+
966
+ # Systemd/init manipulation
967
+ (r"systemctl.*enable.*\.service", "System: enabling systemd service"),
968
+ (r"/etc/systemd/system/", "System: systemd unit file modification"),
969
+ (r"update-rc\.d", "System: SysV init modification"),
970
+ (r"@reboot", "System: cron job at reboot"),
971
+
972
+ # Kernel module manipulation
973
+ (r"modprobe", "System: kernel module loading"),
974
+ (r"insmod", "System: kernel module insertion"),
975
+ (r"/lib/modules/", "System: kernel module directory access"),
976
+
977
+ # Compiler/build chain manipulation
978
+ (r"gcc.*-fPIC.*-shared", "Build: creating shared library (could be malicious)"),
979
+ (r"LD_PRELOAD=", "Build: LD_PRELOAD manipulation (function hijacking)"),
980
+ ]
981
+
982
+ # ========================================================================
983
+ # INFORMATIONAL PATTERNS - Good to know but not necessarily bad
984
+ # ========================================================================
985
+ info_patterns = [
986
+ (r"sudo\s+", "Info: sudo usage detected"),
987
+ (r"git\s+clone", "Info: git clone detected"),
988
+ (r"make\s+install", "Info: make install detected"),
989
+ (r"pip\s+install", "Info: pip install detected"),
990
+ (r"npm\s+install", "Info: npm install detected"),
991
+ (r"cargo\s+install", "Info: cargo install detected"),
992
+ ]
993
+
994
+ # ========================================================================
995
+ # SCAN PATTERNS LINE BY LINE
996
+ # ========================================================================
997
+ for i, line in enumerate(lines, 1):
998
+ # Skip comments and empty lines for pattern matching
999
+ stripped_line = line.strip()
1000
+ if stripped_line.startswith('#') or not stripped_line:
1001
+ continue
1002
+
1003
+ # Check dangerous patterns (red flags)
1004
+ for pattern, message in dangerous_patterns:
1005
+ if re.search(pattern, line, re.IGNORECASE):
1006
+ logger.warning(f"Red flag found at line {i}: {message}")
1007
+ red_flags.append({
1008
+ "line": i,
1009
+ "content": line.strip()[:100], # Limit length for output
1010
+ "issue": message,
1011
+ "severity": "CRITICAL"
1012
+ })
1013
+
1014
+ # Check suspicious patterns
1015
+ for pattern, message in suspicious_patterns:
1016
+ if re.search(pattern, line, re.IGNORECASE):
1017
+ logger.info(f"Warning found at line {i}: {message}")
1018
+ warnings.append({
1019
+ "line": i,
1020
+ "content": line.strip()[:100],
1021
+ "issue": message,
1022
+ "severity": "WARNING"
1023
+ })
1024
+
1025
+ # Check informational patterns
1026
+ for pattern, message in info_patterns:
1027
+ if re.search(pattern, line, re.IGNORECASE):
1028
+ info.append({
1029
+ "line": i,
1030
+ "content": line.strip()[:100],
1031
+ "issue": message,
1032
+ "severity": "INFO"
1033
+ })
1034
+
1035
+ # ========================================================================
1036
+ # ANALYZE SOURCE URLs
1037
+ # ========================================================================
1038
+ source_urls = re.findall(r'source=\([^)]+\)|source_\w+=\([^)]+\)', pkgbuild_content, re.MULTILINE)
1039
+ suspicious_domains = []
1040
+
1041
+ # Known suspicious TLDs and patterns
1042
+ suspicious_tlds = ['.tk', '.ml', '.ga', '.cf', '.gq', '.cn', '.ru']
1043
+ suspicious_url_patterns = [
1044
+ (r'bit\.ly|tinyurl|shorturl', "URL shortener (hides true destination)"),
1045
+ (r'pastebin|hastebin|paste\.ee', "Paste site (common for malware hosting)"),
1046
+ (r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', "Raw IP address (suspicious)"),
1047
+ ]
1048
+
1049
+ for source_block in source_urls:
1050
+ # Extract URLs from source array
1051
+ urls = re.findall(r'https?://[^\s\'"]+', source_block)
1052
+
1053
+ for url in urls:
1054
+ try:
1055
+ parsed = urlparse(url)
1056
+ domain = parsed.netloc.lower()
1057
+
1058
+ # Check for suspicious TLDs
1059
+ if any(domain.endswith(tld) for tld in suspicious_tlds):
1060
+ warnings.append({
1061
+ "line": 0,
1062
+ "content": url,
1063
+ "issue": f"Suspicious domain TLD: {domain}",
1064
+ "severity": "WARNING"
1065
+ })
1066
+ suspicious_domains.append(domain)
1067
+
1068
+ # Check for suspicious URL patterns
1069
+ for pattern, message in suspicious_url_patterns:
1070
+ if re.search(pattern, url, re.IGNORECASE):
1071
+ warnings.append({
1072
+ "line": 0,
1073
+ "content": url,
1074
+ "issue": message,
1075
+ "severity": "WARNING"
1076
+ })
1077
+ except Exception as e:
1078
+ logger.debug(f"Failed to parse URL {url}: {e}")
1079
+
1080
+ # ========================================================================
1081
+ # DETECT BINARY DOWNLOADS
1082
+ # ========================================================================
1083
+ binary_extensions = ['.bin', '.exe', '.AppImage', '.deb', '.rpm', '.jar', '.apk']
1084
+ for ext in binary_extensions:
1085
+ if ext in pkgbuild_content.lower():
1086
+ warnings.append({
1087
+ "line": 0,
1088
+ "content": "",
1089
+ "issue": f"Binary file type detected: {ext}",
1090
+ "severity": "WARNING"
1091
+ })
1092
+
1093
+ # ========================================================================
1094
+ # CALCULATE RISK SCORE
1095
+ # ========================================================================
1096
+ # Risk scoring: red_flags = 50 points each, warnings = 5 points each, cap at 100
1097
+ risk_score = min(100, (len(red_flags) * 50) + (len(warnings) * 5))
1098
+
1099
+ # ========================================================================
1100
+ # GENERATE RECOMMENDATION
1101
+ # ========================================================================
1102
+ if len(red_flags) > 0:
1103
+ recommendation = "❌ DANGEROUS - Critical security issues detected. DO NOT INSTALL."
1104
+ safe = False
1105
+ elif len(warnings) >= 5:
1106
+ recommendation = "⚠️ HIGH RISK - Multiple suspicious patterns detected. Review carefully before installing."
1107
+ safe = False
1108
+ elif len(warnings) > 0:
1109
+ recommendation = "⚠️ CAUTION - Some suspicious patterns detected. Manual review recommended."
1110
+ safe = True # Technically safe but needs review
1111
+ else:
1112
+ recommendation = "✅ SAFE - No critical issues detected. Standard review still recommended."
1113
+ safe = True
1114
+
1115
+ logger.info(f"PKGBUILD analysis complete: {len(red_flags)} red flags, {len(warnings)} warnings, risk score: {risk_score}")
1116
+
1117
+ return {
1118
+ "safe": safe,
1119
+ "red_flags": red_flags,
1120
+ "warnings": warnings,
1121
+ "info": info,
1122
+ "risk_score": risk_score,
1123
+ "suspicious_domains": list(set(suspicious_domains)),
1124
+ "recommendation": recommendation,
1125
+ "summary": {
1126
+ "total_red_flags": len(red_flags),
1127
+ "total_warnings": len(warnings),
1128
+ "total_info": len(info),
1129
+ "lines_analyzed": len(lines)
1130
+ }
1131
+ }
1132
+