npm - exarch-rs - Versions diffs - 0.1.0 - Mend

exarch-rs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/src/lib.rs ADDED Viewed

@@ -0,0 +1,428 @@
+//! Node.js bindings for exarch-core.
+//!
+//! This crate provides a JavaScript/TypeScript API for secure archive
+//! extraction with built-in protection against path traversal, zip bombs,
+//! symlink attacks, and other common vulnerabilities.
+//!
+//! # Installation
+//!
+//! ```bash
+//! npm install @exarch/node
+//! ```
+//!
+//! # Quick Start
+//!
+//! ```javascript
+//! const { extractArchive, SecurityConfig } = require('@exarch/node');
+//!
+//! // Use secure defaults
+//! const report = await extractArchive('archive.tar.gz', '/tmp/output');
+//! console.log(`Extracted ${report.filesExtracted} files`);
+//!
+//! // Customize security settings
+//! const config = new SecurityConfig()
+//!   .maxFileSize(100 * 1024 * 1024)
+//!   .allowSymlinks(true);
+//! const report = await extractArchive('archive.tar.gz', '/tmp/output', config);
+//! ```
+//!
+//! # Security
+//!
+//! This library uses a secure-by-default approach. All potentially dangerous
+//! features are disabled by default and must be explicitly enabled. See
+//! `SecurityConfig` for configuration options.
+//!
+//! # Repository
+//!
+//! <https://github.com/rabax/exarch>
+//!
+//! # License
+//!
+//! MIT OR Apache-2.0
+use napi::bindgen_prelude::*;
+use napi_derive::napi;
+mod config;
+mod error;
+mod report;
+mod utils;
+use config::SecurityConfig;
+use error::convert_error;
+use report::ExtractionReport;
+use utils::validate_path;
+/// Extract an archive to the specified directory (async).
+///
+/// This function provides secure archive extraction with configurable
+/// security policies. By default, it uses a restrictive security
+/// configuration that blocks symlinks, hardlinks, absolute paths, and
+/// enforces resource quotas.
+///
+/// # Security Considerations
+///
+/// ## Thread Safety and TOCTOU
+///
+/// The extraction runs on a libuv thread pool worker thread. This creates
+/// a Time-Of-Check-Time-Of-Use (TOCTOU) race condition where the archive
+/// file could be modified between validation and extraction. This is an
+/// accepted tradeoff for async performance. For untrusted archives, ensure
+/// exclusive access to the archive file during extraction.
+///
+/// ## Input Validation
+///
+/// - Paths containing null bytes are rejected (security)
+/// - Paths exceeding 4096 bytes are rejected (`DoS` prevention)
+/// - All validation happens at the Node.js boundary before calling core library
+///
+/// # Arguments
+///
+/// * `archive_path` - Path to the archive file
+/// * `output_dir` - Directory where files will be extracted
+/// * `config` - Optional `SecurityConfig` (uses secure defaults if omitted)
+///
+/// # Returns
+///
+/// Promise resolving to `ExtractionReport` with extraction statistics
+///
+/// # Errors
+///
+/// Returns error for security violations or I/O errors. Error messages are
+/// prefixed with error codes for discrimination in JavaScript:
+///
+/// - `PATH_TRAVERSAL`: Path traversal attempt detected
+/// - `SYMLINK_ESCAPE`: Symlink points outside extraction directory
+/// - `HARDLINK_ESCAPE`: Hardlink target outside extraction directory
+/// - `ZIP_BOMB`: Potential zip bomb detected
+/// - `INVALID_PERMISSIONS`: File permissions are invalid or unsafe
+/// - `QUOTA_EXCEEDED`: Resource quota exceeded
+/// - `SECURITY_VIOLATION`: Security policy violation
+/// - `UNSUPPORTED_FORMAT`: Archive format not supported
+/// - `INVALID_ARCHIVE`: Archive is corrupted
+/// - `IO_ERROR`: I/O operation failed
+///
+/// # Examples
+///
+/// ```javascript
+/// // Use secure defaults
+/// const report = await extractArchive('archive.tar.gz', '/tmp/output');
+/// console.log(`Extracted ${report.filesExtracted} files`);
+///
+/// // Customize security settings
+/// const config = new SecurityConfig().maxFileSize(100 * 1024 * 1024);
+/// const report = await extractArchive('archive.tar.gz', '/tmp/output', config);
+/// ```
+#[napi]
+#[allow(clippy::needless_pass_by_value, clippy::trailing_empty_array)]
+pub async fn extract_archive(
+    archive_path: String,
+    output_dir: String,
+    config: Option<&SecurityConfig>,
+) -> Result<ExtractionReport> {
+    // Validate paths at boundary
+    // NOTE: Defense-in-depth - paths are validated here and again in core
+    // library. This boundary validation catches issues early and provides
+    // better error messages for Node.js users.
+    validate_path(&archive_path)?;
+    validate_path(&output_dir)?;
+    // Get config reference or use default
+    let default_config = exarch_core::SecurityConfig::default();
+    let config_ref = config.map_or(&default_config, |c| c.as_core());
+    // Use Arc to share config across thread boundary without cloning
+    let config_arc = std::sync::Arc::new(config_ref.clone());
+    // Run extraction on tokio thread pool
+    //
+    // NAPI-RS with tokio_rt feature uses tokio runtime for async operations.
+    // spawn_blocking is required because archive extraction does blocking I/O.
+    // This moves the work to tokio's blocking thread pool rather than
+    // blocking the Node.js event loop.
+    //
+    // NOTE: TOCTOU race condition - archive contents can change between
+    // validation and extraction. This is an accepted limitation for async I/O.
+    // For maximum security with untrusted archives, use extractArchiveSync()
+    // or ensure exclusive file access (e.g., flock) during extraction.
+    let report = tokio::task::spawn_blocking(move || {
+        exarch_core::extract_archive(&archive_path, &output_dir, &config_arc)
+    })
+    .await
+    .map_err(|e| Error::from_reason(format!("task execution failed: {e}")))?
+    .map_err(convert_error)?;
+    Ok(ExtractionReport::from(report))
+}
+/// Extract an archive to the specified directory (sync).
+///
+/// Synchronous version of `extractArchive`. Blocks the event loop until
+/// extraction completes. Prefer the async version for most use cases.
+///
+/// # Arguments
+///
+/// * `archive_path` - Path to the archive file
+/// * `output_dir` - Directory where files will be extracted
+/// * `config` - Optional `SecurityConfig` (uses secure defaults if omitted)
+///
+/// # Returns
+///
+/// `ExtractionReport` with extraction statistics
+///
+/// # Errors
+///
+/// Returns error for security violations or I/O errors. See `extract_archive`
+/// for error code documentation.
+///
+/// # Examples
+///
+/// ```javascript
+/// // Use secure defaults
+/// const report = extractArchiveSync('archive.tar.gz', '/tmp/output');
+/// console.log(`Extracted ${report.filesExtracted} files`);
+///
+/// // Customize security settings
+/// const config = new SecurityConfig().maxFileSize(100 * 1024 * 1024);
+/// const report = extractArchiveSync('archive.tar.gz', '/tmp/output', config);
+/// ```
+#[napi]
+#[allow(clippy::needless_pass_by_value)]
+pub fn extract_archive_sync(
+    archive_path: String,
+    output_dir: String,
+    config: Option<&SecurityConfig>,
+) -> Result<ExtractionReport> {
+    // Validate paths at boundary
+    // NOTE: Defense-in-depth - paths are validated here and again in core
+    // library. This boundary validation catches issues early and provides
+    // better error messages for Node.js users.
+    validate_path(&archive_path)?;
+    validate_path(&output_dir)?;
+    // Get config reference or use default
+    let default_config = exarch_core::SecurityConfig::default();
+    let config_ref = config.map_or(&default_config, |c| c.as_core());
+    // Run extraction synchronously
+    let report = exarch_core::extract_archive(&archive_path, &output_dir, config_ref)
+        .map_err(convert_error)?;
+    Ok(ExtractionReport::from(report))
+}
+#[cfg(test)]
+#[allow(
+    clippy::unwrap_used,
+    clippy::expect_used,
+    clippy::uninlined_format_args,
+    clippy::manual_string_new
+)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_module_exports_functions() {
+        // This test just ensures the module compiles and exports the expected
+        // functions. Runtime tests would require actual archive files.
+    }
+    // CR-004: Path validation tests
+    #[tokio::test]
+    async fn test_extract_archive_rejects_null_byte_in_archive_path() {
+        let result = extract_archive(
+            "/tmp/test\0malicious.tar".to_string(),
+            "/tmp/output".to_string(),
+            None,
+        )
+        .await;
+        assert!(result.is_err(), "should reject null bytes in archive path");
+        assert!(
+            result.unwrap_err().to_string().contains("null bytes"),
+            "error message should mention null bytes"
+        );
+    }
+    #[tokio::test]
+    async fn test_extract_archive_rejects_null_byte_in_output_dir() {
+        let result = extract_archive(
+            "/tmp/test.tar".to_string(),
+            "/tmp/output\0malicious".to_string(),
+            None,
+        )
+        .await;
+        assert!(result.is_err(), "should reject null bytes in output path");
+        assert!(
+            result.unwrap_err().to_string().contains("null bytes"),
+            "error message should mention null bytes"
+        );
+    }
+    #[tokio::test]
+    async fn test_extract_archive_rejects_excessively_long_archive_path() {
+        let long_path = "x".repeat(5000);
+        let result = extract_archive(long_path, "/tmp/output".to_string(), None).await;
+        assert!(result.is_err(), "should reject excessively long paths");
+        assert!(
+            result.unwrap_err().to_string().contains("maximum length"),
+            "error message should mention length limit"
+        );
+    }
+    #[tokio::test]
+    async fn test_extract_archive_rejects_excessively_long_output_dir() {
+        let long_path = "x".repeat(5000);
+        let result = extract_archive("/tmp/test.tar".to_string(), long_path, None).await;
+        assert!(result.is_err(), "should reject excessively long paths");
+        assert!(
+            result.unwrap_err().to_string().contains("maximum length"),
+            "error message should mention length limit"
+        );
+    }
+    #[tokio::test]
+    async fn test_extract_archive_accepts_empty_paths() {
+        // Empty paths should be accepted at boundary validation
+        // Core library will handle actual path validation
+        let result = extract_archive("".to_string(), "".to_string(), None).await;
+        // If it fails, ensure it's not a boundary path validation error
+        // (empty paths pass boundary validation; core handles semantic validation)
+        if let Err(e) = result {
+            let err_msg = e.to_string();
+            assert!(
+                !err_msg.contains("null bytes") && !err_msg.contains("maximum length"),
+                "should not fail on boundary path validation, got: {}",
+                err_msg
+            );
+        }
+        // If it succeeds, boundary validation passed (which is what we're
+        // testing)
+    }
+    #[test]
+    fn test_extract_archive_sync_rejects_null_byte_in_archive_path() {
+        let result = extract_archive_sync(
+            "/tmp/test\0malicious.tar".to_string(),
+            "/tmp/output".to_string(),
+            None,
+        );
+        assert!(result.is_err(), "should reject null bytes in archive path");
+        assert!(
+            result.unwrap_err().to_string().contains("null bytes"),
+            "error message should mention null bytes"
+        );
+    }
+    #[test]
+    fn test_extract_archive_sync_rejects_null_byte_in_output_dir() {
+        let result = extract_archive_sync(
+            "/tmp/test.tar".to_string(),
+            "/tmp/output\0malicious".to_string(),
+            None,
+        );
+        assert!(result.is_err(), "should reject null bytes in output path");
+        assert!(
+            result.unwrap_err().to_string().contains("null bytes"),
+            "error message should mention null bytes"
+        );
+    }
+    #[test]
+    fn test_extract_archive_sync_rejects_excessively_long_archive_path() {
+        let long_path = "x".repeat(5000);
+        let result = extract_archive_sync(long_path, "/tmp/output".to_string(), None);
+        assert!(result.is_err(), "should reject excessively long paths");
+        assert!(
+            result.unwrap_err().to_string().contains("maximum length"),
+            "error message should mention length limit"
+        );
+    }
+    #[test]
+    fn test_extract_archive_sync_rejects_excessively_long_output_dir() {
+        let long_path = "x".repeat(5000);
+        let result = extract_archive_sync("/tmp/test.tar".to_string(), long_path, None);
+        assert!(result.is_err(), "should reject excessively long paths");
+        assert!(
+            result.unwrap_err().to_string().contains("maximum length"),
+            "error message should mention length limit"
+        );
+    }
+    #[test]
+    fn test_extract_archive_sync_accepts_valid_paths() {
+        // Test that valid paths pass boundary validation
+        // The actual extraction may succeed (empty archive) or fail (file not found)
+        // but should NOT fail due to path validation
+        let result = extract_archive_sync(
+            "/tmp/valid_test_path.tar".to_string(),
+            "/tmp/valid_output_path".to_string(),
+            None,
+        );
+        // If it fails, ensure it's not a path validation error
+        if let Err(e) = result {
+            let err_msg = e.to_string();
+            assert!(
+                !err_msg.contains("null bytes") && !err_msg.contains("maximum length"),
+                "should not fail on path validation, got: {}",
+                err_msg
+            );
+        }
+        // If it succeeds, path validation passed (which is what we're testing)
+    }
+    #[test]
+    fn test_extract_archive_sync_accepts_relative_paths() {
+        // Test that valid relative paths pass boundary validation
+        let result = extract_archive_sync(
+            "relative_test.tar".to_string(),
+            "relative_output".to_string(),
+            None,
+        );
+        // If it fails, ensure it's not a path validation error
+        if let Err(e) = result {
+            let err_msg = e.to_string();
+            assert!(
+                !err_msg.contains("null bytes") && !err_msg.contains("maximum length"),
+                "should not fail on path validation for relative paths, got: {}",
+                err_msg
+            );
+        }
+        // If it succeeds, path validation passed (which is what we're testing)
+    }
+    #[test]
+    fn test_extract_archive_sync_with_custom_config() {
+        let mut config = SecurityConfig::new();
+        config.max_file_size(1_000_000).unwrap();
+        // Test that valid paths pass boundary validation with custom config
+        let result = extract_archive_sync(
+            "custom_test.tar".to_string(),
+            "custom_output".to_string(),
+            Some(&config),
+        );
+        // If it fails, ensure it's not a path validation error
+        if let Err(e) = result {
+            let err_msg = e.to_string();
+            assert!(
+                !err_msg.contains("null bytes") && !err_msg.contains("maximum length"),
+                "should not fail on path validation, got: {}",
+                err_msg
+            );
+        }
+        // If it succeeds, path validation passed (which is what we're testing)
+    }
+}

package/src/report.rs ADDED Viewed

@@ -0,0 +1,176 @@
+//! Node.js bindings for `ExtractionReport`.
+use exarch_core::ExtractionReport as CoreReport;
+use napi_derive::napi;
+/// Report of an archive extraction operation.
+///
+/// Contains statistics and metadata about the extraction process.
+#[napi(object)]
+#[derive(Debug, Clone)]
+pub struct ExtractionReport {
+    /// Number of files successfully extracted.
+    pub files_extracted: u32,
+    /// Number of directories created.
+    pub directories_created: u32,
+    /// Number of symlinks created.
+    pub symlinks_created: u32,
+    /// Total bytes written to disk.
+    pub bytes_written: i64,
+    /// Extraction duration in milliseconds.
+    pub duration_ms: i64,
+    /// Number of files skipped due to security checks.
+    pub files_skipped: u32,
+    /// List of warning messages.
+    pub warnings: Vec<String>,
+}
+impl From<CoreReport> for ExtractionReport {
+    #[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
+    fn from(report: CoreReport) -> Self {
+        // Use saturating conversions to prevent silent wraparound on overflow
+        // This ensures audit trails remain accurate even for very large extractions
+        Self {
+            files_extracted: report.files_extracted.min(u32::MAX as usize) as u32,
+            directories_created: report.directories_created.min(u32::MAX as usize) as u32,
+            symlinks_created: report.symlinks_created.min(u32::MAX as usize) as u32,
+            bytes_written: report.bytes_written.min(i64::MAX as u64) as i64,
+            duration_ms: report.duration.as_millis().min(i64::MAX as u128) as i64,
+            files_skipped: report.files_skipped.min(u32::MAX as usize) as u32,
+            warnings: report.warnings,
+        }
+    }
+}
+#[cfg(test)]
+#[allow(clippy::unwrap_used, clippy::expect_used)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+    #[test]
+    fn test_extraction_report_conversion() {
+        let mut core_report = CoreReport::new();
+        core_report.files_extracted = 10;
+        core_report.directories_created = 5;
+        core_report.symlinks_created = 2;
+        core_report.bytes_written = 1024;
+        core_report.duration = Duration::from_millis(500);
+        core_report.files_skipped = 1;
+        core_report.add_warning("Test warning".to_string());
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.files_extracted, 10);
+        assert_eq!(report.directories_created, 5);
+        assert_eq!(report.symlinks_created, 2);
+        assert_eq!(report.bytes_written, 1024);
+        assert_eq!(report.duration_ms, 500);
+        assert_eq!(report.files_skipped, 1);
+        assert_eq!(report.warnings.len(), 1);
+        assert_eq!(report.warnings[0], "Test warning");
+    }
+    #[test]
+    fn test_extraction_report_zero_values() {
+        let core_report = CoreReport::new();
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.files_extracted, 0);
+        assert_eq!(report.directories_created, 0);
+        assert_eq!(report.symlinks_created, 0);
+        assert_eq!(report.bytes_written, 0);
+        assert_eq!(report.files_skipped, 0);
+        assert_eq!(report.warnings.len(), 0);
+    }
+    #[test]
+    fn test_extraction_report_large_values() {
+        let mut core_report = CoreReport::new();
+        core_report.files_extracted = 100_000;
+        core_report.directories_created = 50_000;
+        core_report.bytes_written = 10_000_000_000; // 10 GB
+        core_report.duration = Duration::from_secs(3600); // 1 hour
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.files_extracted, 100_000);
+        assert_eq!(report.bytes_written, 10_000_000_000);
+        assert_eq!(report.duration_ms, 3_600_000);
+    }
+    #[test]
+    fn test_extraction_report_multiple_warnings() {
+        let mut core_report = CoreReport::new();
+        core_report.add_warning("Warning 1".to_string());
+        core_report.add_warning("Warning 2".to_string());
+        core_report.add_warning("Warning 3".to_string());
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.warnings.len(), 3);
+        assert_eq!(report.warnings[0], "Warning 1");
+        assert_eq!(report.warnings[1], "Warning 2");
+        assert_eq!(report.warnings[2], "Warning 3");
+    }
+    #[test]
+    fn test_extraction_report_duration_hours() {
+        let mut core_report = CoreReport::new();
+        core_report.duration = Duration::from_secs(7200); // 2 hours
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(
+            report.duration_ms, 7_200_000,
+            "2 hours should be 7,200,000 milliseconds"
+        );
+    }
+    #[test]
+    fn test_extraction_report_duration_zero() {
+        let mut core_report = CoreReport::new();
+        core_report.duration = Duration::from_secs(0);
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.duration_ms, 0, "zero duration should be 0 ms");
+    }
+    #[test]
+    fn test_extraction_report_duration_microseconds() {
+        let mut core_report = CoreReport::new();
+        core_report.duration = Duration::from_micros(1500); // 1.5 ms
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(
+            report.duration_ms, 1,
+            "1500 microseconds should round to 1 millisecond"
+        );
+    }
+    #[test]
+    fn test_extraction_report_warnings_order_preserved() {
+        let mut core_report = CoreReport::new();
+        core_report.add_warning("First warning".to_string());
+        core_report.add_warning("Second warning".to_string());
+        core_report.add_warning("Third warning".to_string());
+        let report = ExtractionReport::from(core_report);
+        assert_eq!(report.warnings.len(), 3, "should have 3 warnings");
+        assert_eq!(
+            report.warnings[0], "First warning",
+            "first warning should be at index 0"
+        );
+        assert_eq!(
+            report.warnings[1], "Second warning",
+            "second warning should be at index 1"
+        );
+        assert_eq!(
+            report.warnings[2], "Third warning",
+            "third warning should be at index 2"
+        );
+    }
+}

package/src/utils.rs ADDED Viewed

@@ -0,0 +1,84 @@
+//! Utility functions for Node.js bindings.
+use napi::bindgen_prelude::*;
+/// Maximum path length in bytes (Linux/macOS `PATH_MAX` is typically 4096)
+const MAX_PATH_LENGTH: usize = 4096;
+/// Validates a path string for security issues.
+///
+/// Rejects:
+/// - Paths containing null bytes (potential injection attacks)
+/// - Paths exceeding `MAX_PATH_LENGTH` bytes (`DoS` prevention)
+///
+/// # Errors
+///
+/// Returns error if path contains null bytes or exceeds maximum length.
+pub fn validate_path(path: &str) -> Result<()> {
+    // Use constant-time null byte check to prevent timing side-channel attacks
+    // The fold operation processes every byte regardless of when null is found
+    let has_null = path.bytes().fold(false, |acc, b| acc | (b == 0));
+    if has_null {
+        return Err(Error::from_reason(
+            "path contains null bytes - potential security issue",
+        ));
+    }
+    if path.len() > MAX_PATH_LENGTH {
+        // Pre-allocate string to avoid multiple allocations
+        use std::fmt::Write;
+        let mut msg = String::with_capacity(80);
+        // Writing to a String never fails
+        let _ = write!(
+            &mut msg,
+            "path exceeds maximum length of {MAX_PATH_LENGTH} bytes (got {} bytes)",
+            path.len()
+        );
+        return Err(Error::from_reason(msg));
+    }
+    Ok(())
+}
+#[cfg(test)]
+#[allow(clippy::unwrap_used, clippy::expect_used)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_validate_path_accepts_normal() {
+        assert!(
+            validate_path("/tmp/test.tar.gz").is_ok(),
+            "absolute paths should be accepted"
+        );
+        assert!(
+            validate_path("relative/path.tar").is_ok(),
+            "relative paths should be accepted"
+        );
+        // Empty path is valid - callers may provide empty strings for defaults
+        // or optional parameters. Core library handles empty path validation.
+        assert!(validate_path("").is_ok(), "empty paths should be accepted");
+    }
+    #[test]
+    fn test_validate_path_rejects_null_bytes() {
+        let result = validate_path("/tmp/test\0malicious");
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("null bytes"));
+    }
+    #[test]
+    fn test_validate_path_rejects_too_long() {
+        let long_path = "x".repeat(MAX_PATH_LENGTH + 1);
+        let result = validate_path(&long_path);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("maximum length"));
+    }
+    #[test]
+    fn test_validate_path_accepts_max_length() {
+        let max_path = "x".repeat(MAX_PATH_LENGTH);
+        assert!(validate_path(&max_path).is_ok());
+    }
+}